#include "d3d9_device.h" #include "d3d9_interface.h" #include "d3d9_swapchain.h" #include "d3d9_caps.h" #include "d3d9_util.h" #include "d3d9_texture.h" #include "d3d9_buffer.h" #include "d3d9_vertex_declaration.h" #include "d3d9_shader.h" #include "d3d9_query.h" #include "d3d9_stateblock.h" #include "d3d9_monitor.h" #include "d3d9_spec_constants.h" #include "d3d9_names.h" #include "d3d9_format_helpers.h" #include "../dxvk/dxvk_adapter.h" #include "../dxvk/dxvk_instance.h" #include "../util/util_bit.h" #include "../util/util_math.h" #include "d3d9_initializer.h" #include #include #ifdef MSC_VER #pragma fenv_access (on) #endif namespace dxvk { D3D9DeviceEx::D3D9DeviceEx( D3D9InterfaceEx* pParent, D3D9Adapter* pAdapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, Rc dxvkDevice) : m_parent ( pParent ) , m_deviceType ( DeviceType ) , m_window ( hFocusWindow ) , m_behaviorFlags ( BehaviorFlags ) , m_adapter ( pAdapter ) , m_dxvkDevice ( dxvkDevice ) , m_shaderModules ( new D3D9ShaderModuleSet ) , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() ) , m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED ) , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? true : false ) , m_csThread ( dxvkDevice->createContext() ) , m_csChunk ( AllocCsChunk() ) { // If we can SWVP, then we use an extended constant set // as SWVP has many more slots available than HWVP. bool canSWVP = CanSWVP(); DetermineConstantLayouts(canSWVP); if (canSWVP) Logger::info("D3D9DeviceEx: Using extended constant set for software vertex processing."); m_initializer = new D3D9Initializer(m_dxvkDevice); m_converter = new D3D9FormatHelper(m_dxvkDevice); EmitCs([ cDevice = m_dxvkDevice ] (DxvkContext* ctx) { ctx->beginRecording(cDevice->createCommandList()); DxvkLogicOpState loState; loState.enableLogicOp = VK_FALSE; loState.logicOp = VK_LOGIC_OP_CLEAR; ctx->setLogicOpState(loState); }); if (!(BehaviorFlags & D3DCREATE_FPU_PRESERVE)) SetupFPU(); m_dxsoOptions = DxsoOptions(this, m_d3d9Options); CreateConstantBuffers(); m_availableMemory = DetermineInitialTextureMemory(); } D3D9DeviceEx::~D3D9DeviceEx() { Flush(); SynchronizeCsThread(); delete m_initializer; delete m_converter; m_dxvkDevice->waitForIdle(); // Sync Device } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::QueryInterface(REFIID riid, void** ppvObject) { if (ppvObject == nullptr) return E_POINTER; *ppvObject = nullptr; bool extended = m_parent->IsExtended() && riid == __uuidof(IDirect3DDevice9Ex); if (riid == __uuidof(IUnknown) || riid == __uuidof(IDirect3DDevice9) || extended) { *ppvObject = ref(this); return S_OK; } // We want to ignore this if the extended device is queried and we weren't made extended. if (riid == __uuidof(IDirect3DDevice9Ex)) return E_NOINTERFACE; Logger::warn("D3D9DeviceEx::QueryInterface: Unknown interface query"); Logger::warn(str::format(riid)); return E_NOINTERFACE; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::TestCooperativeLevel() { // Equivelant of D3D11/DXGI present tests. We can always present. return D3D_OK; } UINT STDMETHODCALLTYPE D3D9DeviceEx::GetAvailableTextureMem() { // This is not meant to be accurate. // The values are also wildly incorrect in d3d9... But some games rely // on this inaccurate value... // Clamp to megabyte range, as per spec. constexpr UINT range = 0xfff00000; // Can't have negative memory! int64_t memory = std::max(m_availableMemory.load(), 0); return UINT(memory) & range; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EvictManagedResources() { return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDirect3D(IDirect3D9** ppD3D9) { if (ppD3D9 == nullptr) return D3DERR_INVALIDCALL; *ppD3D9 = m_parent.ref(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDeviceCaps(D3DCAPS9* pCaps) { return m_adapter->GetDeviceCaps(m_deviceType, pCaps); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode) { if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; return m_implicitSwapchain->GetDisplayMode(pMode); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) { if (pParameters == nullptr) return D3DERR_INVALIDCALL; pParameters->AdapterOrdinal = m_adapter->GetOrdinal(); pParameters->BehaviorFlags = m_behaviorFlags; pParameters->DeviceType = m_deviceType; pParameters->hFocusWindow = m_window; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCursorProperties( UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9* pCursorBitmap) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pCursorBitmap == nullptr)) return D3DERR_INVALIDCALL; auto* cursorTex = GetCommonTexture(pCursorBitmap); if (unlikely(cursorTex->Desc()->Format != D3D9Format::A8R8G8B8)) return D3DERR_INVALIDCALL; uint32_t inputWidth = cursorTex->Desc()->Width; uint32_t inputHeight = cursorTex->Desc()->Height; // Always use a hardware cursor when windowed. bool hwCursor = m_presentParams.Windowed; // Always use a hardware cursor w/h <= 32 px hwCursor |= inputWidth <= HardwareCursorWidth || inputHeight <= HardwareCursorHeight; if (hwCursor) { D3DLOCKED_BOX lockedBox; HRESULT hr = LockImage(cursorTex, 0, 0, &lockedBox, nullptr, D3DLOCK_READONLY); if (FAILED(hr)) return hr; const uint8_t* data = reinterpret_cast(lockedBox.pBits); // Windows works with a stride of 128, lets respect that. // Copy data to the bitmap... CursorBitmap bitmap = { 0 }; size_t copyPitch = std::min( HardwareCursorPitch, inputWidth * inputHeight * HardwareCursorFormatSize); for (uint32_t h = 0; h < HardwareCursorHeight; h++) std::memcpy(&bitmap[h * HardwareCursorPitch], &data[h * lockedBox.RowPitch], copyPitch); UnlockImage(cursorTex, 0, 0); // Set this as our cursor. return m_cursor.SetHardwareCursor(XHotSpot, YHotSpot, bitmap); } // Software Cursor... Logger::warn("D3D9DeviceEx::SetCursorProperties: Software cursor not implemented."); return D3D_OK; } void STDMETHODCALLTYPE D3D9DeviceEx::SetCursorPosition(int X, int Y, DWORD Flags) { D3D9DeviceLock lock = LockDevice(); // I was not able to find an instance // where the cursor update was not immediate. // Fullscreen + Windowed seem to have the same // behaviour here. // Hence we ignore the flag D3DCURSOR_IMMEDIATE_UPDATE. m_cursor.UpdateCursor(X, Y); } BOOL STDMETHODCALLTYPE D3D9DeviceEx::ShowCursor(BOOL bShow) { D3D9DeviceLock lock = LockDevice(); return m_cursor.ShowCursor(bShow); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChain( D3DPRESENT_PARAMETERS* pPresentationParameters, IDirect3DSwapChain9** ppSwapChain) { return CreateAdditionalSwapChainEx(pPresentationParameters, nullptr, ppSwapChain); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(pSwapChain); if (unlikely(pSwapChain == nullptr)) return D3DERR_INVALIDCALL; // This only returns the implicit swapchain... if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; *pSwapChain = static_cast(m_implicitSwapchain.ref()); return D3D_OK; } UINT STDMETHODCALLTYPE D3D9DeviceEx::GetNumberOfSwapChains() { // This only counts the implicit swapchain... return 1; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Reset(D3DPRESENT_PARAMETERS* pPresentationParameters) { D3D9DeviceLock lock = LockDevice(); HRESULT hr = ResetSwapChain(pPresentationParameters, nullptr); if (FAILED(hr)) return hr; hr = ResetState(pPresentationParameters); if (FAILED(hr)) return hr; Flush(); SynchronizeCsThread(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Present( const RECT* pSourceRect, const RECT* pDestRect, HWND hDestWindowOverride, const RGNDATA* pDirtyRegion) { return PresentEx( pSourceRect, pDestRect, hDestWindowOverride, pDirtyRegion, 0); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetBackBuffer( UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9** ppBackBuffer) { InitReturnPtr(ppBackBuffer); if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; return m_implicitSwapchain->GetBackBuffer(iBackBuffer, Type, ppBackBuffer); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus) { if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; return m_implicitSwapchain->GetRasterStatus(pRasterStatus); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDialogBoxMode(BOOL bEnableDialogs) { return m_implicitSwapchain->SetDialogBoxMode(bEnableDialogs); } void STDMETHODCALLTYPE D3D9DeviceEx::SetGammaRamp( UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP* pRamp) { if (unlikely(iSwapChain != 0)) return; m_implicitSwapchain->SetGammaRamp(Flags, pRamp); } void STDMETHODCALLTYPE D3D9DeviceEx::GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp) { if (unlikely(iSwapChain != 0)) return; m_implicitSwapchain->GetGammaRamp(pRamp); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateTexture( UINT Width, UINT Height, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DTexture9** ppTexture, HANDLE* pSharedHandle) { InitReturnPtr(ppTexture); if (unlikely(ppTexture == nullptr)) return D3DERR_INVALIDCALL; D3D9_COMMON_TEXTURE_DESC desc; desc.Width = Width; desc.Height = Height; desc.Depth = 1; desc.ArraySize = 1; desc.MipLevels = Levels; desc.Usage = Usage; desc.Format = EnumerateFormat(Format); desc.Pool = Pool; desc.Discard = FALSE; desc.MultiSample = D3DMULTISAMPLE_NONE; desc.MultisampleQuality = 0; desc.IsBackBuffer = FALSE; desc.IsAttachmentOnly = FALSE; if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) return D3DERR_INVALIDCALL; try { const Com texture = new D3D9Texture2D(this, &desc); void* initialData = nullptr; if (Pool == D3DPOOL_SYSTEMMEM && Levels == 1 && pSharedHandle != nullptr) initialData = *(reinterpret_cast(pSharedHandle)); m_initializer->InitTexture(texture->GetCommonTexture(), initialData); *ppTexture = texture.ref(); return D3D_OK; } catch (const DxvkError& e) { Logger::err(e.message()); return D3DERR_OUTOFVIDEOMEMORY; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVolumeTexture( UINT Width, UINT Height, UINT Depth, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DVolumeTexture9** ppVolumeTexture, HANDLE* pSharedHandle) { InitReturnPtr(ppVolumeTexture); if (unlikely(ppVolumeTexture == nullptr)) return D3DERR_INVALIDCALL; D3D9_COMMON_TEXTURE_DESC desc; desc.Width = Width; desc.Height = Height; desc.Depth = Depth; desc.ArraySize = 1; desc.MipLevels = Levels; desc.Usage = Usage; desc.Format = EnumerateFormat(Format); desc.Pool = Pool; desc.Discard = FALSE; desc.MultiSample = D3DMULTISAMPLE_NONE; desc.MultisampleQuality = 0; desc.IsBackBuffer = FALSE; desc.IsAttachmentOnly = FALSE; if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) return D3DERR_INVALIDCALL; try { const Com texture = new D3D9Texture3D(this, &desc); m_initializer->InitTexture(texture->GetCommonTexture()); *ppVolumeTexture = texture.ref(); return D3D_OK; } catch (const DxvkError& e) { Logger::err(e.message()); return D3DERR_OUTOFVIDEOMEMORY; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateCubeTexture( UINT EdgeLength, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DCubeTexture9** ppCubeTexture, HANDLE* pSharedHandle) { InitReturnPtr(ppCubeTexture); if (unlikely(ppCubeTexture == nullptr)) return D3DERR_INVALIDCALL; D3D9_COMMON_TEXTURE_DESC desc; desc.Width = EdgeLength; desc.Height = EdgeLength; desc.Depth = 1; desc.ArraySize = 6; // A cube has 6 faces, wowwie! desc.MipLevels = Levels; desc.Usage = Usage; desc.Format = EnumerateFormat(Format); desc.Pool = Pool; desc.Discard = FALSE; desc.MultiSample = D3DMULTISAMPLE_NONE; desc.MultisampleQuality = 0; desc.IsBackBuffer = FALSE; desc.IsAttachmentOnly = FALSE; if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) return D3DERR_INVALIDCALL; try { const Com texture = new D3D9TextureCube(this, &desc); m_initializer->InitTexture(texture->GetCommonTexture()); *ppCubeTexture = texture.ref(); return D3D_OK; } catch (const DxvkError& e) { Logger::err(e.message()); return D3DERR_OUTOFVIDEOMEMORY; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexBuffer( UINT Length, DWORD Usage, DWORD FVF, D3DPOOL Pool, IDirect3DVertexBuffer9** ppVertexBuffer, HANDLE* pSharedHandle) { InitReturnPtr(ppVertexBuffer); if (unlikely(ppVertexBuffer == nullptr)) return D3DERR_INVALIDCALL; D3D9_BUFFER_DESC desc; desc.Format = D3D9Format::VERTEXDATA; desc.FVF = FVF; desc.Pool = Pool; desc.Size = Length; desc.Type = D3DRTYPE_VERTEXBUFFER; desc.Usage = Usage; if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc))) return D3DERR_INVALIDCALL; try { const Com buffer = new D3D9VertexBuffer(this, &desc); m_initializer->InitBuffer(buffer->GetCommonBuffer()); *ppVertexBuffer = buffer.ref(); return D3D_OK; } catch (const DxvkError & e) { Logger::err(e.message()); return D3DERR_INVALIDCALL; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateIndexBuffer( UINT Length, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DIndexBuffer9** ppIndexBuffer, HANDLE* pSharedHandle) { InitReturnPtr(ppIndexBuffer); if (unlikely(ppIndexBuffer == nullptr)) return D3DERR_INVALIDCALL; D3D9_BUFFER_DESC desc; desc.Format = EnumerateFormat(Format); desc.Pool = Pool; desc.Size = Length; desc.Type = D3DRTYPE_INDEXBUFFER; desc.Usage = Usage; if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc))) return D3DERR_INVALIDCALL; try { const Com buffer = new D3D9IndexBuffer(this, &desc); m_initializer->InitBuffer(buffer->GetCommonBuffer()); *ppIndexBuffer = buffer.ref(); return D3D_OK; } catch (const DxvkError & e) { Logger::err(e.message()); return D3DERR_INVALIDCALL; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTarget( UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle) { return CreateRenderTargetEx( Width, Height, Format, MultiSample, MultisampleQuality, Lockable, ppSurface, pSharedHandle, 0); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurface( UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle) { return CreateDepthStencilSurfaceEx( Width, Height, Format, MultiSample, MultisampleQuality, Discard, ppSurface, pSharedHandle, 0); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateSurface( IDirect3DSurface9* pSourceSurface, const RECT* pSourceRect, IDirect3DSurface9* pDestinationSurface, const POINT* pDestPoint) { D3D9DeviceLock lock = LockDevice(); D3D9Surface* src = static_cast(pSourceSurface); D3D9Surface* dst = static_cast(pDestinationSurface); if (unlikely(src == nullptr || dst == nullptr)) return D3DERR_INVALIDCALL; D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture(); D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); if (unlikely(srcTextureInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT)) return D3DERR_INVALIDCALL; if (unlikely(srcTextureInfo->Desc()->Format != dstTextureInfo->Desc()->Format)) return D3DERR_INVALIDCALL; const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTextureInfo->GetFormatMapping().FormatColor); VkOffset3D srcBlockOffset = { 0u, 0u, 0u }; VkOffset3D dstOffset = { 0u, 0u, 0u }; VkExtent3D texLevelExtent = srcTextureInfo->GetExtentMip(src->GetSubresource()); VkExtent3D texLevelBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); VkExtent3D copyExtent = texLevelExtent; if (pSourceRect != nullptr) { const VkExtent3D extent = { uint32_t(pSourceRect->right - pSourceRect->left), uint32_t(pSourceRect->bottom - pSourceRect->top), 1 }; const bool extentAligned = extent.width % formatInfo->blockSize.width == 0 && extent.height % formatInfo->blockSize.height == 0; if (pSourceRect->left < 0 || pSourceRect->top < 0 || pSourceRect->right <= pSourceRect->left || pSourceRect->bottom <= pSourceRect->top || pSourceRect->left % formatInfo->blockSize.width != 0 || pSourceRect->top % formatInfo->blockSize.height != 0 || (extent != texLevelExtent && !extentAligned)) return D3DERR_INVALIDCALL; srcBlockOffset = { pSourceRect->left / int32_t(formatInfo->blockSize.width), pSourceRect->top / int32_t(formatInfo->blockSize.height), 0u }; copyExtent = { extent.width, extent.height, 1u }; } if (pDestPoint != nullptr) { if (pDestPoint->x % formatInfo->blockSize.width != 0 || pDestPoint->y % formatInfo->blockSize.height != 0 || pDestPoint->x < 0 || pDestPoint->y < 0) return D3DERR_INVALIDCALL; dstOffset = { pDestPoint->x, pDestPoint->y, 0u }; } VkExtent3D copyBlockCount = util::computeBlockCount(copyExtent, formatInfo->blockSize); const auto dstSubresource = vk::makeSubresourceLayers( dstTextureInfo->GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT, dst->GetSubresource())); DxvkBufferSliceHandle srcSlice = srcTextureInfo->GetMappedSlice(src->GetSubresource()); VkDeviceSize dirtySize = copyBlockCount.width * copyBlockCount.height * formatInfo->elementSize; D3D9BufferSlice slice = AllocTempBuffer(dirtySize); VkDeviceSize copySrcOffset = (srcBlockOffset.z * texLevelBlockCount.height * texLevelBlockCount.width + srcBlockOffset.y * texLevelBlockCount.width + srcBlockOffset.x) * formatInfo->elementSize; VkDeviceSize pitch = align(texLevelBlockCount.width * formatInfo->elementSize, 4); void* srcData = reinterpret_cast(srcSlice.mapPtr) + copySrcOffset; util::packImageData( slice.mapPtr, srcData, copyBlockCount, formatInfo->elementSize, pitch, pitch * texLevelBlockCount.height); Rc dstImage = dstTextureInfo->GetImage(); EmitCs([ cDstImage = std::move(dstImage), cSrcSlice = slice.slice, cDstLayers = dstSubresource, cDstOffset = dstOffset, cCopyExtent = copyExtent ] (DxvkContext* ctx) { ctx->copyBufferToImage( cDstImage, cDstLayers, cDstOffset, cCopyExtent, cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0); }); dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true); if (dstTextureInfo->IsAutomaticMip()) MarkTextureMipsDirty(dstTextureInfo); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateTexture( IDirect3DBaseTexture9* pSourceTexture, IDirect3DBaseTexture9* pDestinationTexture) { D3D9DeviceLock lock = LockDevice(); if (!pDestinationTexture || !pSourceTexture) return D3DERR_INVALIDCALL; D3D9CommonTexture* dstTexInfo = GetCommonTexture(pDestinationTexture); D3D9CommonTexture* srcTexInfo = GetCommonTexture(pSourceTexture); if (unlikely(srcTexInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTexInfo->Desc()->Pool != D3DPOOL_DEFAULT)) return D3DERR_INVALIDCALL; const Rc dstImage = dstTexInfo->GetImage(); const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTexInfo->GetFormatMapping().FormatColor); uint32_t mipLevels = std::min(srcTexInfo->Desc()->MipLevels, dstTexInfo->Desc()->MipLevels); uint32_t arraySlices = std::min(srcTexInfo->Desc()->ArraySize, dstTexInfo->Desc()->ArraySize); if (unlikely(srcTexInfo->IsAutomaticMip() && !dstTexInfo->IsAutomaticMip())) return D3DERR_INVALIDCALL; if (dstTexInfo->IsAutomaticMip()) mipLevels = 1; for (uint32_t a = 0; a < arraySlices; a++) { const D3DBOX& box = srcTexInfo->GetDirtyBox(a); if (box.Left >= box.Right || box.Top >= box.Bottom || box.Front >= box.Back) continue; for (uint32_t m = 0; m < mipLevels; m++) { VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, m, a, 1 }; VkOffset3D scaledBoxOffset = { int32_t(alignDown(box.Left >> m, formatInfo->blockSize.width)), int32_t(alignDown(box.Top >> m, formatInfo->blockSize.height)), int32_t(alignDown(box.Front >> m, formatInfo->blockSize.depth)) }; VkExtent3D scaledBoxExtent = util::computeMipLevelExtent({ uint32_t(box.Right - int32_t(alignDown(box.Left, formatInfo->blockSize.width))), uint32_t(box.Bottom - int32_t(alignDown(box.Top, formatInfo->blockSize.height))), uint32_t(box.Back - int32_t(alignDown(box.Front, formatInfo->blockSize.depth))) }, m); VkExtent3D scaledBoxExtentBlockCount = util::computeBlockCount(scaledBoxExtent, formatInfo->blockSize); VkExtent3D scaledAlignedBoxExtent = util::computeBlockExtent(scaledBoxExtentBlockCount, formatInfo->blockSize); VkExtent3D texLevelExtent = dstImage->mipLevelExtent(m); VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); scaledAlignedBoxExtent.width = std::min(texLevelExtent.width - scaledBoxOffset.x, scaledAlignedBoxExtent.width); scaledAlignedBoxExtent.height = std::min(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height); scaledAlignedBoxExtent.depth = std::min(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth); VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize; D3D9BufferSlice slice = AllocTempBuffer(dirtySize); VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize); VkDeviceSize copySrcOffset = (boxOffsetBlockCount.z * texLevelExtentBlockCount.height * texLevelExtentBlockCount.width + boxOffsetBlockCount.y * texLevelExtentBlockCount.width + boxOffsetBlockCount.x) * formatInfo->elementSize; VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); void* srcData = reinterpret_cast(srcTexInfo->GetMappedSlice(srcTexInfo->CalcSubresource(a, m)).mapPtr) + copySrcOffset; util::packImageData( slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize, pitch, pitch * texLevelExtentBlockCount.height); scaledAlignedBoxExtent.width = std::min(texLevelExtent.width, scaledAlignedBoxExtent.width); scaledAlignedBoxExtent.height = std::min(texLevelExtent.height, scaledAlignedBoxExtent.height); scaledAlignedBoxExtent.depth = std::min(texLevelExtent.depth, scaledAlignedBoxExtent.depth); EmitCs([ cDstImage = dstImage, cSrcSlice = slice.slice, cDstLayers = dstLayers, cExtent = scaledAlignedBoxExtent, cOffset = scaledBoxOffset ] (DxvkContext* ctx) { ctx->copyBufferToImage( cDstImage, cDstLayers, cOffset, cExtent, cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0); }); dstTexInfo->SetWrittenByGPU(dstTexInfo->CalcSubresource(a, m), true); } } srcTexInfo->ClearDirtyBoxes(); if (dstTexInfo->IsAutomaticMip() && mipLevels != dstTexInfo->Desc()->MipLevels) MarkTextureMipsDirty(dstTexInfo); FlushImplicit(false); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTargetData( IDirect3DSurface9* pRenderTarget, IDirect3DSurface9* pDestSurface) { D3D9DeviceLock lock = LockDevice(); D3D9Surface* src = static_cast(pRenderTarget); D3D9Surface* dst = static_cast(pDestSurface); if (unlikely(src == nullptr || dst == nullptr)) return D3DERR_INVALIDCALL; if (pRenderTarget == pDestSurface) return D3D_OK; D3D9CommonTexture* dstTexInfo = GetCommonTexture(dst); D3D9CommonTexture* srcTexInfo = GetCommonTexture(src); if (srcTexInfo->Desc()->Format != dstTexInfo->Desc()->Format) return D3DERR_INVALIDCALL; if (dstTexInfo->Desc()->Pool == D3DPOOL_DEFAULT) return this->StretchRect(pRenderTarget, nullptr, pDestSurface, nullptr, D3DTEXF_NONE); Rc dstBuffer = dstTexInfo->GetBuffer(dst->GetSubresource()); Rc srcImage = srcTexInfo->GetImage(); const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); const VkImageSubresource srcSubresource = srcTexInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource()); VkImageSubresourceLayers srcSubresourceLayers = { srcSubresource.aspectMask, srcSubresource.mipLevel, srcSubresource.arrayLayer, 1 }; VkExtent3D srcExtent = srcTexInfo->GetExtentMip(src->GetMipLevel()); VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(srcExtent, srcFormatInfo->blockSize); VkDeviceSize pitch = align(texLevelExtentBlockCount.width * uint32_t(srcFormatInfo->elementSize), 4); uint32_t pitchBlocks = uint32_t(pitch / srcFormatInfo->elementSize); VkExtent2D dstExtent = VkExtent2D{ pitchBlocks, texLevelExtentBlockCount.height * pitchBlocks }; EmitCs([ cBuffer = dstBuffer, cImage = srcImage, cSubresources = srcSubresourceLayers, cLevelExtent = srcExtent, cDstExtent = dstExtent ] (DxvkContext* ctx) { ctx->copyImageToBuffer(cBuffer, 0, 4, 0, cImage, cSubresources, VkOffset3D { 0, 0, 0 }, cLevelExtent); }); dstTexInfo->SetWrittenByGPU(dst->GetSubresource(), true); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface) { if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; return m_implicitSwapchain->GetFrontBufferData(pDestSurface); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::StretchRect( IDirect3DSurface9* pSourceSurface, const RECT* pSourceRect, IDirect3DSurface9* pDestSurface, const RECT* pDestRect, D3DTEXTUREFILTERTYPE Filter) { D3D9DeviceLock lock = LockDevice(); D3D9Surface* dst = static_cast(pDestSurface); D3D9Surface* src = static_cast(pSourceSurface); if (unlikely(src == nullptr || dst == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(src == dst)) return D3DERR_INVALIDCALL; bool fastPath = true; D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture(); if (unlikely(dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT || srcTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT)) return D3DERR_INVALIDCALL; Rc dstImage = dstTextureInfo->GetImage(); Rc srcImage = srcTextureInfo->GetImage(); const DxvkFormatInfo* dstFormatInfo = imageFormatInfo(dstImage->info().format); const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format); const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex(dstFormatInfo->aspectMask, dst->GetSubresource()); const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource()); if (unlikely((srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) && m_flags.test(D3D9DeviceFlag::InScene))) return D3DERR_INVALIDCALL; VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel); VkExtent3D dstExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel); D3D9Format srcFormat = srcTextureInfo->Desc()->Format; D3D9Format dstFormat = dstTextureInfo->Desc()->Format; // We may only fast path copy non identicals one way! // We don't know what garbage could be in the X8 data. bool similar = AreFormatsSimilar(srcFormat, dstFormat); // Copies are only supported on similar formats. fastPath &= similar; // Copies are only supported if the sample count matches, // otherwise we need to resolve. bool needsResolve = srcImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT; bool fbBlit = dstImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT; fastPath &= !fbBlit; // Copies would only work if we are block aligned. if (pSourceRect != nullptr) { fastPath &= (pSourceRect->left % srcFormatInfo->blockSize.width == 0); fastPath &= (pSourceRect->right % srcFormatInfo->blockSize.width == 0); fastPath &= (pSourceRect->top % srcFormatInfo->blockSize.height == 0); fastPath &= (pSourceRect->bottom % srcFormatInfo->blockSize.height == 0); } if (pDestRect != nullptr) { fastPath &= (pDestRect->left % dstFormatInfo->blockSize.width == 0); fastPath &= (pDestRect->top % dstFormatInfo->blockSize.height == 0); } VkImageSubresourceLayers dstSubresourceLayers = { dstSubresource.aspectMask, dstSubresource.mipLevel, dstSubresource.arrayLayer, 1 }; VkImageSubresourceLayers srcSubresourceLayers = { srcSubresource.aspectMask, srcSubresource.mipLevel, srcSubresource.arrayLayer, 1 }; VkImageBlit blitInfo; blitInfo.dstSubresource = dstSubresourceLayers; blitInfo.srcSubresource = srcSubresourceLayers; blitInfo.dstOffsets[0] = pDestRect != nullptr ? VkOffset3D{ int32_t(pDestRect->left), int32_t(pDestRect->top), 0 } : VkOffset3D{ 0, 0, 0 }; blitInfo.dstOffsets[1] = pDestRect != nullptr ? VkOffset3D{ int32_t(pDestRect->right), int32_t(pDestRect->bottom), 1 } : VkOffset3D{ int32_t(dstExtent.width), int32_t(dstExtent.height), 1 }; blitInfo.srcOffsets[0] = pSourceRect != nullptr ? VkOffset3D{ int32_t(pSourceRect->left), int32_t(pSourceRect->top), 0 } : VkOffset3D{ 0, 0, 0 }; blitInfo.srcOffsets[1] = pSourceRect != nullptr ? VkOffset3D{ int32_t(pSourceRect->right), int32_t(pSourceRect->bottom), 1 } : VkOffset3D{ int32_t(srcExtent.width), int32_t(srcExtent.height), 1 }; if (unlikely(IsBlitRegionInvalid(blitInfo.srcOffsets, srcExtent))) return D3DERR_INVALIDCALL; if (unlikely(IsBlitRegionInvalid(blitInfo.dstOffsets, dstExtent))) return D3DERR_INVALIDCALL; VkExtent3D srcCopyExtent = { uint32_t(blitInfo.srcOffsets[1].x - blitInfo.srcOffsets[0].x), uint32_t(blitInfo.srcOffsets[1].y - blitInfo.srcOffsets[0].y), uint32_t(blitInfo.srcOffsets[1].z - blitInfo.srcOffsets[0].z) }; VkExtent3D dstCopyExtent = { uint32_t(blitInfo.dstOffsets[1].x - blitInfo.dstOffsets[0].x), uint32_t(blitInfo.dstOffsets[1].y - blitInfo.dstOffsets[0].y), uint32_t(blitInfo.dstOffsets[1].z - blitInfo.dstOffsets[0].z) }; // Copies would only work if the extents match. (ie. no stretching) bool stretch = srcCopyExtent != dstCopyExtent; fastPath &= !stretch; if (!fastPath || needsResolve) { // Compressed destination formats are forbidden for blits. if (dstFormatInfo->flags.test(DxvkFormatFlag::BlockCompressed)) return D3DERR_INVALIDCALL; } auto EmitResolveCS = [&](const Rc& resolveDst, bool intermediate) { VkImageResolve region; region.srcSubresource = blitInfo.srcSubresource; region.srcOffset = blitInfo.srcOffsets[0]; region.dstSubresource = intermediate ? blitInfo.srcSubresource : blitInfo.dstSubresource; region.dstOffset = intermediate ? blitInfo.srcOffsets[0] : blitInfo.dstOffsets[0]; region.extent = srcCopyExtent; EmitCs([ cDstImage = resolveDst, cSrcImage = srcImage, cRegion = region ] (DxvkContext* ctx) { if (cRegion.srcSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->resolveImage( cDstImage, cSrcImage, cRegion, VK_FORMAT_UNDEFINED); } else { ctx->resolveDepthStencilImage( cDstImage, cSrcImage, cRegion, VK_RESOLVE_MODE_AVERAGE_BIT_KHR, VK_RESOLVE_MODE_AVERAGE_BIT_KHR); } }); }; if (fastPath) { if (needsResolve) { EmitResolveCS(dstImage, false); } else { EmitCs([ cDstImage = dstImage, cSrcImage = srcImage, cDstLayers = blitInfo.dstSubresource, cSrcLayers = blitInfo.srcSubresource, cDstOffset = blitInfo.dstOffsets[0], cSrcOffset = blitInfo.srcOffsets[0], cExtent = srcCopyExtent ] (DxvkContext* ctx) { ctx->copyImage( cDstImage, cDstLayers, cDstOffset, cSrcImage, cSrcLayers, cSrcOffset, cExtent); }); } } else { if (needsResolve) { auto resolveSrc = srcTextureInfo->GetResolveImage(); EmitResolveCS(resolveSrc, true); srcImage = resolveSrc; } EmitCs([ cDstImage = dstImage, cDstMap = dstTextureInfo->GetMapping().Swizzle, cSrcImage = srcImage, cSrcMap = srcTextureInfo->GetMapping().Swizzle, cBlitInfo = blitInfo, cFilter = stretch ? DecodeFilter(Filter) : VK_FILTER_NEAREST ] (DxvkContext* ctx) { ctx->blitImage( cDstImage, cDstMap, cSrcImage, cSrcMap, cBlitInfo, cFilter); }); } dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true); if (dstTextureInfo->IsAutomaticMip()) MarkTextureMipsDirty(dstTextureInfo); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ColorFill( IDirect3DSurface9* pSurface, const RECT* pRect, D3DCOLOR Color) { D3D9DeviceLock lock = LockDevice(); D3D9Surface* dst = static_cast(pSurface); if (unlikely(dst == nullptr)) return D3DERR_INVALIDCALL; D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture(); if (unlikely(dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT)) return D3DERR_INVALIDCALL; VkExtent3D mipExtent = dstTextureInfo->GetExtentMip(dst->GetSubresource()); VkOffset3D offset = VkOffset3D{ 0u, 0u, 0u }; VkExtent3D extent = mipExtent; bool isFullExtent = true; if (pRect != nullptr) { ConvertRect(*pRect, offset, extent); isFullExtent = offset == VkOffset3D{ 0u, 0u, 0u } && extent == mipExtent; } Rc rtView = dst->GetRenderTargetView(false); VkClearValue clearValue; DecodeD3DCOLOR(Color, clearValue.color.float32); // Fast path for games that may use this as an // alternative to Clear on render targets. if (isFullExtent && rtView != nullptr) { EmitCs([ cImageView = rtView, cClearValue = clearValue ] (DxvkContext* ctx) { ctx->clearRenderTarget( cImageView, VK_IMAGE_ASPECT_COLOR_BIT, cClearValue); }); } else { if (unlikely(rtView == nullptr)) { const D3D9Format format = dstTextureInfo->Desc()->Format; if (format != D3D9Format::NULL_FORMAT) Logger::err(str::format("D3D9DeviceEx::ColorFill: Unsupported format ", format)); return D3D_OK; } EmitCs([ cImageView = rtView, cOffset = offset, cExtent = extent, cClearValue = clearValue ] (DxvkContext* ctx) { ctx->clearImageView( cImageView, cOffset, cExtent, VK_IMAGE_ASPECT_COLOR_BIT, cClearValue); }); } dstTextureInfo->SetWrittenByGPU(dst->GetSubresource(), true); if (dstTextureInfo->IsAutomaticMip()) MarkTextureMipsDirty(dstTextureInfo); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurface( UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle) { return CreateOffscreenPlainSurfaceEx( Width, Height, Format, Pool, ppSurface, pSharedHandle, 0); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderTarget( DWORD RenderTargetIndex, IDirect3DSurface9* pRenderTarget) { D3D9DeviceLock lock = LockDevice(); if (unlikely(RenderTargetIndex >= caps::MaxSimultaneousRenderTargets || (pRenderTarget == nullptr && RenderTargetIndex == 0))) return D3DERR_INVALIDCALL; D3D9Surface* rt = static_cast(pRenderTarget); D3D9CommonTexture* texInfo = rt != nullptr ? rt->GetCommonTexture() : nullptr; if (unlikely(rt != nullptr && !(texInfo->Desc()->Usage & D3DUSAGE_RENDERTARGET))) return D3DERR_INVALIDCALL; if (RenderTargetIndex == 0) { auto rtSize = rt->GetSurfaceExtent(); D3DVIEWPORT9 viewport; viewport.X = 0; viewport.Y = 0; viewport.Width = rtSize.width; viewport.Height = rtSize.height; viewport.MinZ = 0.0f; viewport.MaxZ = 1.0f; RECT scissorRect; scissorRect.left = 0; scissorRect.top = 0; scissorRect.right = rtSize.width; scissorRect.bottom = rtSize.height; if (m_state.viewport != viewport) { m_flags.set(D3D9DeviceFlag::DirtyFFViewport); m_flags.set(D3D9DeviceFlag::DirtyPointScale); m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); m_state.viewport = viewport; } if (m_state.scissorRect != scissorRect) { m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); m_state.scissorRect = scissorRect; } } if (m_state.renderTargets[RenderTargetIndex] == rt) return D3D_OK; // Do a strong flush if the first render target is changed. FlushImplicit(RenderTargetIndex == 0 ? TRUE : FALSE); m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); m_state.renderTargets[RenderTargetIndex] = rt; UpdateBoundRTs(RenderTargetIndex); UpdateActiveRTs(RenderTargetIndex); uint32_t originalAlphaSwizzleRTs = m_alphaSwizzleRTs; m_alphaSwizzleRTs &= ~(1 << RenderTargetIndex); if (rt != nullptr) { if (texInfo->GetMapping().Swizzle.a == VK_COMPONENT_SWIZZLE_ONE) m_alphaSwizzleRTs |= 1 << RenderTargetIndex; if (texInfo->IsAutomaticMip()) texInfo->SetNeedsMipGen(true); texInfo->SetWrittenByGPU(rt->GetSubresource(), true); } if (originalAlphaSwizzleRTs != m_alphaSwizzleRTs) m_flags.set(D3D9DeviceFlag::DirtyBlendState); if (RenderTargetIndex == 0) { bool validSampleMask = texInfo->Desc()->MultiSample > D3DMULTISAMPLE_NONMASKABLE; if (validSampleMask != m_flags.test(D3D9DeviceFlag::ValidSampleMask)) { m_flags.clr(D3D9DeviceFlag::ValidSampleMask); if (validSampleMask) m_flags.set(D3D9DeviceFlag::ValidSampleMask); m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); } } return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTarget( DWORD RenderTargetIndex, IDirect3DSurface9** ppRenderTarget) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppRenderTarget); if (unlikely(ppRenderTarget == nullptr || RenderTargetIndex > caps::MaxSimultaneousRenderTargets)) return D3DERR_INVALIDCALL; if (m_state.renderTargets[RenderTargetIndex] == nullptr) return D3DERR_NOTFOUND; *ppRenderTarget = m_state.renderTargets[RenderTargetIndex].ref(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil) { D3D9DeviceLock lock = LockDevice(); D3D9Surface* ds = static_cast(pNewZStencil); if (unlikely(ds && !(ds->GetCommonTexture()->Desc()->Usage & D3DUSAGE_DEPTHSTENCIL))) return D3DERR_INVALIDCALL; if (m_state.depthStencil == ds) return D3D_OK; FlushImplicit(FALSE); m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); if (ds != nullptr) { float rValue = GetDepthBufferRValue(ds->GetCommonTexture()->GetFormatMapping().FormatColor); if (m_depthBiasScale != rValue) { m_depthBiasScale = rValue; m_flags.set(D3D9DeviceFlag::DirtyDepthBias); } } m_state.depthStencil = ds; UpdateActiveHazardsDS(UINT32_MAX); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppZStencilSurface); if (unlikely(ppZStencilSurface == nullptr)) return D3DERR_INVALIDCALL; if (m_state.depthStencil == nullptr) return D3DERR_NOTFOUND; *ppZStencilSurface = m_state.depthStencil.ref(); return D3D_OK; } // The Begin/EndScene functions actually do nothing. // Some games don't even call them. HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginScene() { D3D9DeviceLock lock = LockDevice(); if (unlikely(m_flags.test(D3D9DeviceFlag::InScene))) return D3DERR_INVALIDCALL; m_flags.set(D3D9DeviceFlag::InScene); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndScene() { D3D9DeviceLock lock = LockDevice(); if (unlikely(!m_flags.test(D3D9DeviceFlag::InScene))) return D3DERR_INVALIDCALL; FlushImplicit(true); m_flags.clr(D3D9DeviceFlag::InScene); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Clear( DWORD Count, const D3DRECT* pRects, DWORD Flags, D3DCOLOR Color, float Z, DWORD Stencil) { if (unlikely(!Count && pRects)) return D3D_OK; D3D9DeviceLock lock = LockDevice(); const auto& vp = m_state.viewport; const auto& sc = m_state.scissorRect; bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE]; bool scissor = m_state.renderStates[D3DRS_SCISSORTESTENABLE]; VkOffset3D offset = { int32_t(vp.X), int32_t(vp.Y), 0 }; VkExtent3D extent = { vp.Width, vp.Height, 1u }; if (scissor) { offset.x = std::max (offset.x, sc.left); offset.y = std::max (offset.y, sc.top); extent.width = std::min(extent.width, sc.right - offset.x); extent.height = std::min(extent.height, sc.bottom - offset.y); } // This becomes pretty unreadable in one singular if statement... if (Count) { // If pRects is null, or our first rect encompasses the viewport: if (!pRects) Count = 0; else if (pRects[0].x1 <= offset.x && pRects[0].y1 <= offset.y && pRects[0].x2 >= offset.x + int32_t(extent.width) && pRects[0].y2 >= offset.y + int32_t(extent.height)) Count = 0; } // Here, Count of 0 will denote whether or not to care about user rects. VkClearValue clearValueDepth; clearValueDepth.depthStencil.depth = Z; clearValueDepth.depthStencil.stencil = Stencil; VkClearValue clearValueColor; DecodeD3DCOLOR(Color, clearValueColor.color.float32); VkImageAspectFlags depthAspectMask = 0; if (m_state.depthStencil != nullptr) { if (Flags & D3DCLEAR_ZBUFFER) depthAspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; if (Flags & D3DCLEAR_STENCIL) depthAspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; depthAspectMask &= imageFormatInfo(m_state.depthStencil->GetCommonTexture()->GetFormatMapping().FormatColor)->aspectMask; } auto ClearImageView = [this]( bool fullClear, VkOffset3D offset, VkExtent3D extent, const Rc& imageView, VkImageAspectFlags aspectMask, VkClearValue clearValue) { if (fullClear) { EmitCs([ cClearValue = clearValue, cAspectMask = aspectMask, cImageView = imageView ] (DxvkContext* ctx) { ctx->clearRenderTarget( cImageView, cAspectMask, cClearValue); }); } else { EmitCs([ cClearValue = clearValue, cAspectMask = aspectMask, cImageView = imageView, cOffset = offset, cExtent = extent ] (DxvkContext* ctx) { ctx->clearImageView( cImageView, cOffset, cExtent, cAspectMask, cClearValue); }); } }; auto ClearViewRect = [&]( bool fullClear, VkOffset3D offset, VkExtent3D extent) { // Clear depth if we need to. if (depthAspectMask != 0) ClearImageView(fullClear, offset, extent, m_state.depthStencil->GetDepthStencilView(), depthAspectMask, clearValueDepth); // Clear render targets if we need to. if (Flags & D3DCLEAR_TARGET) { for (uint32_t rt : bit::BitMask(m_boundRTs)) { const auto& rts = m_state.renderTargets[rt]; const auto& rtv = rts->GetRenderTargetView(srgb); if (likely(rtv != nullptr)) { ClearImageView(fullClear, offset, extent, rtv, VK_IMAGE_ASPECT_COLOR_BIT, clearValueColor); D3D9CommonTexture* dstTexture = rts->GetCommonTexture(); if (dstTexture->IsAutomaticMip()) MarkTextureMipsDirty(dstTexture); } } } }; // A Hat in Time and other UE3 games only gets partial clears here // because of an oversized rt height due to their weird alignment... // This works around that. uint32_t alignment = m_d3d9Options.lenientClear ? 8 : 1; auto rtSize = m_state.renderTargets[0]->GetSurfaceExtent(); extent.width = std::min(rtSize.width - offset.x, extent.width); extent.height = std::min(rtSize.height - offset.y, extent.height); bool extentMatches = align(extent.width, alignment) == align(rtSize.width, alignment) && align(extent.height, alignment) == align(rtSize.height, alignment); bool dsMatches = true; if (depthAspectMask) { auto dsSize = m_state.depthStencil->GetSurfaceExtent(); dsMatches = align(extent.width, alignment) == align(dsSize.width, alignment) && align(extent.height, alignment) == align(dsSize.height, alignment); } bool rtSizeMatchesClearSize = offset.x == 0 && offset.y == 0 && extentMatches && dsMatches; if (likely(!Count && rtSizeMatchesClearSize)) { // Fast path w/ ClearRenderTarget for when // our viewport and stencils match the RT size ClearViewRect(true, offset, extent); } else if (!Count) { // Clear our viewport & scissor minified region in this rendertarget. ClearViewRect(false, offset, extent); } else { // Clear the application provided rects. for (uint32_t i = 0; i < Count; i++) { VkOffset3D rectOffset = { std::max(pRects[i].x1, offset.x), std::max(pRects[i].y1, offset.y), 0 }; VkExtent3D rectExtent = { std::min(pRects[i].x2, offset.x + extent.width) - rectOffset.x, std::min(pRects[i].y2, offset.y + extent.height) - rectOffset.y, 1u }; ClearViewRect(false, rectOffset, rectExtent); } } return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix) { return SetStateTransform(GetTransformIndex(State), pMatrix); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pMatrix == nullptr)) return D3DERR_INVALIDCALL; *pMatrix = bit::cast(m_state.transforms[GetTransformIndex(State)]); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix) { D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) return m_recorder->MultiplyStateTransform(TransformState, pMatrix); uint32_t idx = GetTransformIndex(TransformState); m_state.transforms[idx] = m_state.transforms[idx] * ConvertMatrix(pMatrix); m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD)) m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetViewport(const D3DVIEWPORT9* pViewport) { D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) return m_recorder->SetViewport(pViewport); if (m_state.viewport == *pViewport) return D3D_OK; m_state.viewport = *pViewport; m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); m_flags.set(D3D9DeviceFlag::DirtyFFViewport); m_flags.set(D3D9DeviceFlag::DirtyPointScale); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetViewport(D3DVIEWPORT9* pViewport) { D3D9DeviceLock lock = LockDevice(); if (pViewport == nullptr) return D3DERR_INVALIDCALL; *pViewport = m_state.viewport; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaterial(const D3DMATERIAL9* pMaterial) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pMaterial == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(ShouldRecord())) return m_recorder->SetMaterial(pMaterial); m_state.material = *pMaterial; m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaterial(D3DMATERIAL9* pMaterial) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pMaterial == nullptr)) return D3DERR_INVALIDCALL; *pMaterial = m_state.material; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetLight(DWORD Index, const D3DLIGHT9* pLight) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pLight == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(ShouldRecord())) { Logger::warn("D3D9DeviceEx::SetLight: State block not implemented."); return D3D_OK; } if (Index >= m_state.lights.size()) m_state.lights.resize(Index + 1); m_state.lights[Index] = *pLight; if (m_state.IsLightEnabled(Index)) m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLight(DWORD Index, D3DLIGHT9* pLight) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pLight == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index])) return D3DERR_INVALIDCALL; *pLight = m_state.lights[Index].value(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::LightEnable(DWORD Index, BOOL Enable) { D3D9DeviceLock lock = LockDevice(); if (unlikely(Index >= m_state.lights.size())) m_state.lights.resize(Index + 1); if (unlikely(!m_state.lights[Index])) m_state.lights[Index] = DefaultLight; if (m_state.IsLightEnabled(Index) == !!Enable) return D3D_OK; uint32_t searchIndex = UINT32_MAX; uint32_t setIndex = Index; if (!Enable) std::swap(searchIndex, setIndex); for (auto& idx : m_state.enabledLightIndices) { if (idx == searchIndex) { idx = setIndex; m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); break; } } return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLightEnable(DWORD Index, BOOL* pEnable) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pEnable == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index])) return D3DERR_INVALIDCALL; *pEnable = m_state.IsLightEnabled(Index) ? 128 : 0; // Weird quirk but OK. return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipPlane(DWORD Index, const float* pPlane) { D3D9DeviceLock lock = LockDevice(); if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) return D3DERR_INVALIDCALL; if (unlikely(ShouldRecord())) return m_recorder->SetClipPlane(Index, pPlane); bool dirty = false; for (uint32_t i = 0; i < 4; i++) { dirty |= m_state.clipPlanes[Index].coeff[i] != pPlane[i]; m_state.clipPlanes[Index].coeff[i] = pPlane[i]; } bool enabled = m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1u << Index); dirty &= enabled; if (dirty) m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipPlane(DWORD Index, float* pPlane) { D3D9DeviceLock lock = LockDevice(); if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) return D3DERR_INVALIDCALL; for (uint32_t i = 0; i < 4; i++) pPlane[i] = m_state.clipPlanes[Index].coeff[i]; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) { D3D9DeviceLock lock = LockDevice(); // D3D9 only allows reading for values 0 and 7-255 so we don't need to do anything but return OK if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) { return D3D_OK; } if (unlikely(ShouldRecord())) return m_recorder->SetRenderState(State, Value); auto& states = m_state.renderStates; bool changed = states[State] != Value; if (likely(changed)) { const bool oldClipPlaneEnabled = IsClipPlaneEnabled(); const bool oldDepthBiasEnabled = IsDepthBiasEnabled(); const bool oldATOC = IsAlphaToCoverageEnabled(); const bool oldNVDB = states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB); const bool oldAlphaTest = IsAlphaTestEnabled(); states[State] = Value; // AMD's driver hack for ATOC and RESZ if (unlikely(State == D3DRS_POINTSIZE)) { // ATOC constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::A2M1); constexpr uint32_t AlphaToCoverageDisable = uint32_t(D3D9Format::A2M0); if (Value == AlphaToCoverageEnable || Value == AlphaToCoverageDisable) { m_amdATOC = Value == AlphaToCoverageEnable; bool newATOC = IsAlphaToCoverageEnabled(); bool newAlphaTest = IsAlphaTestEnabled(); if (oldATOC != newATOC) m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); if (oldAlphaTest != newAlphaTest) m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); return D3D_OK; } // RESZ constexpr uint32_t RESZ = 0x7fa05000; if (Value == RESZ) { ResolveZ(); return D3D_OK; } } // NV's driver hack for ATOC. if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) { constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::ATOC); constexpr uint32_t AlphaToCoverageDisable = 0; if (Value == AlphaToCoverageEnable || Value == AlphaToCoverageDisable) { m_nvATOC = Value == AlphaToCoverageEnable; bool newATOC = IsAlphaToCoverageEnabled(); bool newAlphaTest = IsAlphaTestEnabled(); if (oldATOC != newATOC) m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); if (oldAlphaTest != newAlphaTest) m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); return D3D_OK; } if (unlikely(Value == uint32_t(D3D9Format::COPM))) { // UE3 calls this MinimalNVIDIADriverShaderOptimization Logger::info("D3D9DeviceEx::SetRenderState: MinimalNVIDIADriverShaderOptimization is unsupported"); return D3D_OK; } } switch (State) { case D3DRS_SEPARATEALPHABLENDENABLE: case D3DRS_ALPHABLENDENABLE: case D3DRS_BLENDOP: case D3DRS_BLENDOPALPHA: case D3DRS_DESTBLEND: case D3DRS_DESTBLENDALPHA: case D3DRS_SRCBLEND: case D3DRS_SRCBLENDALPHA: m_flags.set(D3D9DeviceFlag::DirtyBlendState); break; case D3DRS_COLORWRITEENABLE: UpdateActiveRTs(0); m_flags.set(D3D9DeviceFlag::DirtyBlendState); break; case D3DRS_COLORWRITEENABLE1: UpdateActiveRTs(1); m_flags.set(D3D9DeviceFlag::DirtyBlendState); break; case D3DRS_COLORWRITEENABLE2: UpdateActiveRTs(2); m_flags.set(D3D9DeviceFlag::DirtyBlendState); break; case D3DRS_COLORWRITEENABLE3: UpdateActiveRTs(3); m_flags.set(D3D9DeviceFlag::DirtyBlendState); break; case D3DRS_ALPHATESTENABLE: { bool newATOC = IsAlphaToCoverageEnabled(); bool newAlphaTest = IsAlphaTestEnabled(); if (oldATOC != newATOC) m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); if (oldAlphaTest != newAlphaTest) m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); break; } case D3DRS_ALPHAFUNC: m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState); break; case D3DRS_BLENDFACTOR: BindBlendFactor(); break; case D3DRS_MULTISAMPLEMASK: if (m_flags.test(D3D9DeviceFlag::ValidSampleMask)) m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState); break; case D3DRS_ZWRITEENABLE: if (m_activeHazardsDS != 0) m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState); break; case D3DRS_ZENABLE: case D3DRS_ZFUNC: case D3DRS_TWOSIDEDSTENCILMODE: case D3DRS_STENCILENABLE: case D3DRS_STENCILFAIL: case D3DRS_STENCILZFAIL: case D3DRS_STENCILPASS: case D3DRS_STENCILFUNC: case D3DRS_CCW_STENCILFAIL: case D3DRS_CCW_STENCILZFAIL: case D3DRS_CCW_STENCILPASS: case D3DRS_CCW_STENCILFUNC: case D3DRS_STENCILMASK: case D3DRS_STENCILWRITEMASK: m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState); break; case D3DRS_STENCILREF: BindDepthStencilRefrence(); break; case D3DRS_SCISSORTESTENABLE: m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); break; case D3DRS_SRGBWRITEENABLE: m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); break; case D3DRS_DEPTHBIAS: case D3DRS_SLOPESCALEDEPTHBIAS: { const bool depthBiasEnabled = IsDepthBiasEnabled(); if (depthBiasEnabled != oldDepthBiasEnabled) m_flags.set(D3D9DeviceFlag::DirtyRasterizerState); if (depthBiasEnabled) m_flags.set(D3D9DeviceFlag::DirtyDepthBias); break; } case D3DRS_CULLMODE: case D3DRS_FILLMODE: m_flags.set(D3D9DeviceFlag::DirtyRasterizerState); break; case D3DRS_CLIPPLANEENABLE: { const bool clipPlaneEnabled = IsClipPlaneEnabled(); if (clipPlaneEnabled != oldClipPlaneEnabled) m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); break; } case D3DRS_ALPHAREF: UpdatePushConstant(); break; case D3DRS_TEXTUREFACTOR: m_flags.set(D3D9DeviceFlag::DirtyFFPixelData); break; case D3DRS_DIFFUSEMATERIALSOURCE: case D3DRS_AMBIENTMATERIALSOURCE: case D3DRS_SPECULARMATERIALSOURCE: case D3DRS_EMISSIVEMATERIALSOURCE: case D3DRS_COLORVERTEX: case D3DRS_LIGHTING: case D3DRS_NORMALIZENORMALS: case D3DRS_LOCALVIEWER: m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); break; case D3DRS_AMBIENT: m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); break; case D3DRS_SPECULARENABLE: m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); break; case D3DRS_FOGENABLE: case D3DRS_FOGVERTEXMODE: case D3DRS_FOGTABLEMODE: m_flags.set(D3D9DeviceFlag::DirtyFogState); break; case D3DRS_RANGEFOGENABLE: m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); break; case D3DRS_FOGCOLOR: m_flags.set(D3D9DeviceFlag::DirtyFogColor); break; case D3DRS_FOGSTART: m_flags.set(D3D9DeviceFlag::DirtyFogScale); break; case D3DRS_FOGEND: m_flags.set(D3D9DeviceFlag::DirtyFogScale); m_flags.set(D3D9DeviceFlag::DirtyFogEnd); break; case D3DRS_FOGDENSITY: m_flags.set(D3D9DeviceFlag::DirtyFogDensity); break; case D3DRS_POINTSIZE: UpdatePushConstant(); break; case D3DRS_POINTSIZE_MIN: UpdatePushConstant(); break; case D3DRS_POINTSIZE_MAX: UpdatePushConstant(); break; case D3DRS_POINTSCALE_A: case D3DRS_POINTSCALE_B: case D3DRS_POINTSCALE_C: m_flags.set(D3D9DeviceFlag::DirtyPointScale); break; case D3DRS_POINTSCALEENABLE: case D3DRS_POINTSPRITEENABLE: // Nothing to do here! // This is handled in UpdatePointMode. break; case D3DRS_SHADEMODE: if (m_state.pixelShader != nullptr) { BindShader( GetCommonShader(m_state.pixelShader), GetPixelShaderPermutation()); } m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); break; case D3DRS_TWEENFACTOR: m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); break; case D3DRS_VERTEXBLEND: m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); break; case D3DRS_INDEXEDVERTEXBLENDENABLE: if (CanSWVP() && Value) m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); break; case D3DRS_ADAPTIVETESS_X: case D3DRS_ADAPTIVETESS_Z: case D3DRS_ADAPTIVETESS_W: if (states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB) || oldNVDB) { m_flags.set(D3D9DeviceFlag::DirtyDepthBounds); break; } default: static bool s_errorShown[256]; if (!std::exchange(s_errorShown[State], true)) Logger::warn(str::format("D3D9DeviceEx::SetRenderState: Unhandled render state ", State)); break; } } return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pValue == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) { return D3DERR_INVALIDCALL; } if (State < D3DRS_ZENABLE || State > D3DRS_BLENDOPALPHA) *pValue = 0; else *pValue = m_state.renderStates[State]; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateStateBlock( D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9** ppSB) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppSB); if (unlikely(ppSB == nullptr)) return D3DERR_INVALIDCALL; try { const Com sb = new D3D9StateBlock(this, ConvertStateBlockType(Type)); *ppSB = sb.ref(); return D3D_OK; } catch (const DxvkError & e) { Logger::err(e.message()); return D3DERR_INVALIDCALL; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginStateBlock() { D3D9DeviceLock lock = LockDevice(); if (unlikely(m_recorder != nullptr)) return D3DERR_INVALIDCALL; m_recorder = new D3D9StateBlock(this, D3D9StateBlockType::None); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndStateBlock(IDirect3DStateBlock9** ppSB) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppSB); if (unlikely(ppSB == nullptr || m_recorder == nullptr)) return D3DERR_INVALIDCALL; *ppSB = m_recorder.ref(); m_recorder = nullptr; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipStatus(const D3DCLIPSTATUS9* pClipStatus) { Logger::warn("D3D9DeviceEx::SetClipStatus: Stub"); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipStatus(D3DCLIPSTATUS9* pClipStatus) { Logger::warn("D3D9DeviceEx::GetClipStatus: Stub"); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture) { D3D9DeviceLock lock = LockDevice(); if (ppTexture == nullptr) return D3DERR_INVALIDCALL; *ppTexture = nullptr; if (unlikely(InvalidSampler(Stage))) return D3D_OK; DWORD stateSampler = RemapSamplerState(Stage); *ppTexture = ref(m_state.textures[stateSampler]); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture) { if (unlikely(InvalidSampler(Stage))) return D3D_OK; DWORD stateSampler = RemapSamplerState(Stage); return SetStateTexture(stateSampler, pTexture); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTextureStageState( DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD* pValue) { auto dxvkType = RemapTextureStageStateType(Type); if (unlikely(pValue == nullptr)) return D3DERR_INVALIDCALL; *pValue = 0; if (unlikely(Stage >= caps::TextureStageCount)) return D3DERR_INVALIDCALL; if (unlikely(dxvkType >= TextureStageStateCount)) return D3DERR_INVALIDCALL; *pValue = m_state.textureStages[Stage][dxvkType]; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTextureStageState( DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value) { return SetStateTextureStageState(Stage, RemapTextureStageStateType(Type), Value); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSamplerState( DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD* pValue) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pValue == nullptr)) return D3DERR_INVALIDCALL; *pValue = 0; if (unlikely(InvalidSampler(Sampler))) return D3D_OK; Sampler = RemapSamplerState(Sampler); *pValue = m_state.samplerStates[Sampler][Type]; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSamplerState( DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value) { if (unlikely(InvalidSampler(Sampler))) return D3D_OK; uint32_t stateSampler = RemapSamplerState(Sampler); return SetStateSamplerState(stateSampler, Type, Value); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ValidateDevice(DWORD* pNumPasses) { if (pNumPasses != nullptr) *pNumPasses = 1; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY* pEntries) { // This succeeds even though we don't advertise support. return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY* pEntries) { // Don't advertise support for this... return D3DERR_INVALIDCALL; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCurrentTexturePalette(UINT PaletteNumber) { // This succeeds even though we don't advertise support. return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCurrentTexturePalette(UINT *PaletteNumber) { // Don't advertise support for this... return D3DERR_INVALIDCALL; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetScissorRect(const RECT* pRect) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pRect == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(ShouldRecord())) return m_recorder->SetScissorRect(pRect); if (m_state.scissorRect == *pRect) return D3D_OK; m_state.scissorRect = *pRect; m_flags.set(D3D9DeviceFlag::DirtyViewportScissor); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetScissorRect(RECT* pRect) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pRect == nullptr)) return D3DERR_INVALIDCALL; *pRect = m_state.scissorRect; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSoftwareVertexProcessing(BOOL bSoftware) { auto lock = LockDevice(); if (bSoftware && !CanSWVP()) return D3DERR_INVALIDCALL; m_isSWVP = bSoftware; return D3D_OK; } BOOL STDMETHODCALLTYPE D3D9DeviceEx::GetSoftwareVertexProcessing() { auto lock = LockDevice(); return m_isSWVP; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetNPatchMode(float nSegments) { return D3D_OK; } float STDMETHODCALLTYPE D3D9DeviceEx::GetNPatchMode() { return 0.0f; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitive( D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount) { D3D9DeviceLock lock = LockDevice(); if (unlikely(m_state.vertexDecl == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(!PrimitiveCount)) return S_OK; PrepareDraw(PrimitiveType); EmitCs([this, cPrimType = PrimitiveType, cPrimCount = PrimitiveCount, cStartVertex = StartVertex, cInstanceCount = GetInstanceCount() ](DxvkContext* ctx) { auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); ApplyPrimitiveType(ctx, cPrimType); ctx->draw( drawInfo.vertexCount, drawInfo.instanceCount, cStartVertex, 0); }); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitive( D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT StartIndex, UINT PrimitiveCount) { D3D9DeviceLock lock = LockDevice(); if (unlikely(m_state.vertexDecl == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(!PrimitiveCount)) return S_OK; PrepareDraw(PrimitiveType); EmitCs([this, cPrimType = PrimitiveType, cPrimCount = PrimitiveCount, cStartIndex = StartIndex, cBaseVertexIndex = BaseVertexIndex, cInstanceCount = GetInstanceCount() ](DxvkContext* ctx) { auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); ApplyPrimitiveType(ctx, cPrimType); ctx->drawIndexed( drawInfo.vertexCount, drawInfo.instanceCount, cStartIndex, cBaseVertexIndex, 0); }); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitiveUP( D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void* pVertexStreamZeroData, UINT VertexStreamZeroStride) { D3D9DeviceLock lock = LockDevice(); if (unlikely(m_state.vertexDecl == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(!PrimitiveCount)) return S_OK; PrepareDraw(PrimitiveType); auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0); const uint32_t dataSize = GetUPDataSize(drawInfo.vertexCount, VertexStreamZeroStride); const uint32_t bufferSize = GetUPBufferSize(drawInfo.vertexCount, VertexStreamZeroStride); auto upSlice = AllocTempBuffer(bufferSize); FillUPVertexBuffer(upSlice.mapPtr, pVertexStreamZeroData, dataSize, bufferSize); EmitCs([this, cBufferSlice = std::move(upSlice.slice), cPrimType = PrimitiveType, cPrimCount = PrimitiveCount, cInstanceCount = GetInstanceCount(), cStride = VertexStreamZeroStride ](DxvkContext* ctx) { auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); ApplyPrimitiveType(ctx, cPrimType); ctx->bindVertexBuffer(0, cBufferSlice, cStride); ctx->draw( drawInfo.vertexCount, drawInfo.instanceCount, 0, 0); ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0); }); m_state.vertexBuffers[0].vertexBuffer = nullptr; m_state.vertexBuffers[0].offset = 0; m_state.vertexBuffers[0].stride = 0; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitiveUP( D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, const void* pIndexData, D3DFORMAT IndexDataFormat, const void* pVertexStreamZeroData, UINT VertexStreamZeroStride) { D3D9DeviceLock lock = LockDevice(); if (unlikely(m_state.vertexDecl == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(!PrimitiveCount)) return S_OK; PrepareDraw(PrimitiveType); auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0); const uint32_t vertexDataSize = GetUPDataSize(MinVertexIndex + NumVertices, VertexStreamZeroStride); const uint32_t vertexBufferSize = GetUPBufferSize(MinVertexIndex + NumVertices, VertexStreamZeroStride); const uint32_t indexSize = IndexDataFormat == D3DFMT_INDEX16 ? 2 : 4; const uint32_t indicesSize = drawInfo.vertexCount * indexSize; const uint32_t upSize = vertexBufferSize + indicesSize; auto upSlice = AllocTempBuffer(upSize); uint8_t* data = reinterpret_cast(upSlice.mapPtr); FillUPVertexBuffer(data, pVertexStreamZeroData, vertexDataSize, vertexBufferSize); std::memcpy(data + vertexBufferSize, pIndexData, indicesSize); EmitCs([this, cVertexSize = vertexBufferSize, cBufferSlice = std::move(upSlice.slice), cPrimType = PrimitiveType, cPrimCount = PrimitiveCount, cStride = VertexStreamZeroStride, cInstanceCount = GetInstanceCount(), cIndexType = DecodeIndexType( static_cast(IndexDataFormat)) ](DxvkContext* ctx) { auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount); ApplyPrimitiveType(ctx, cPrimType); ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride); ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType); ctx->drawIndexed( drawInfo.vertexCount, drawInfo.instanceCount, 0, 0, 0); ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0); ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32); }); m_state.vertexBuffers[0].vertexBuffer = nullptr; m_state.vertexBuffers[0].offset = 0; m_state.vertexBuffers[0].stride = 0; m_state.indices = nullptr; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ProcessVertices( UINT SrcStartIndex, UINT DestIndex, UINT VertexCount, IDirect3DVertexBuffer9* pDestBuffer, IDirect3DVertexDeclaration9* pVertexDecl, DWORD Flags) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pDestBuffer == nullptr || pVertexDecl == nullptr)) return D3DERR_INVALIDCALL; if (!SupportsSWVP()) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D9DeviceEx::ProcessVertices: SWVP emu unsupported (vertexPipelineStoresAndAtomics)"); return D3D_OK; } D3D9CommonBuffer* dst = static_cast(pDestBuffer)->GetCommonBuffer(); D3D9VertexDecl* decl = static_cast (pVertexDecl); PrepareDraw(D3DPT_FORCE_DWORD); if (decl == nullptr) { DWORD FVF = dst->Desc()->FVF; auto iter = m_fvfTable.find(FVF); if (iter == m_fvfTable.end()) { decl = new D3D9VertexDecl(this, FVF); m_fvfTable.insert(std::make_pair(FVF, decl)); } else decl = iter->second.ptr(); } uint32_t offset = DestIndex * decl->GetSize(); auto slice = dst->GetBufferSlice(); slice = slice.subSlice(offset, slice.length() - offset); EmitCs([this, cDecl = ref(decl), cVertexCount = VertexCount, cStartIndex = SrcStartIndex, cInstanceCount = GetInstanceCount(), cBufferSlice = slice, cIndexed = m_state.indices != nullptr ](DxvkContext* ctx) { Rc shader = m_swvpEmulator.GetShaderModule(this, cDecl); auto drawInfo = GenerateDrawInfo(D3DPT_POINTLIST, cVertexCount, cInstanceCount); if (drawInfo.instanceCount != 1) { drawInfo.instanceCount = 1; Logger::warn("D3D9DeviceEx::ProcessVertices: instancing unsupported"); } ApplyPrimitiveType(ctx, D3DPT_POINTLIST); // Unbind the pixel shader, we aren't drawing // to avoid val errors / UB. ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, nullptr); ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, shader); ctx->bindResourceBuffer(getSWVPBufferSlot(), cBufferSlice); ctx->draw( drawInfo.vertexCount, drawInfo.instanceCount, cStartIndex, 0); ctx->bindResourceBuffer(getSWVPBufferSlot(), DxvkBufferSlice()); ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, nullptr); }); // We unbound the pixel shader before, // let's make sure that gets rebound. m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); if (m_state.pixelShader != nullptr) { BindShader( GetCommonShader(m_state.pixelShader), GetPixelShaderPermutation()); } if (dst->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) { uint32_t copySize = VertexCount * decl->GetSize(); EmitCs([ cSrcBuffer = dst->GetBuffer(), cDstBuffer = dst->GetBuffer(), cOffset = offset, cCopySize = copySize ](DxvkContext* ctx) { ctx->copyBuffer(cDstBuffer, cOffset, cSrcBuffer, cOffset, cCopySize); }); } dst->SetWrittenByGPU(true); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexDeclaration( const D3DVERTEXELEMENT9* pVertexElements, IDirect3DVertexDeclaration9** ppDecl) { InitReturnPtr(ppDecl); if (unlikely(ppDecl == nullptr || pVertexElements == nullptr)) return D3DERR_INVALIDCALL; const D3DVERTEXELEMENT9* counter = pVertexElements; while (counter->Stream != 0xFF) counter++; const uint32_t declCount = uint32_t(counter - pVertexElements); try { const Com decl = new D3D9VertexDecl(this, pVertexElements, declCount); *ppDecl = decl.ref(); return D3D_OK; } catch (const DxvkError & e) { Logger::err(e.message()); return D3DERR_INVALIDCALL; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl) { D3D9DeviceLock lock = LockDevice(); D3D9VertexDecl* decl = static_cast(pDecl); if (unlikely(ShouldRecord())) return m_recorder->SetVertexDeclaration(decl); if (decl == m_state.vertexDecl.ptr()) return D3D_OK; bool dirtyFFShader = decl == nullptr || m_state.vertexDecl == nullptr; if (!dirtyFFShader) dirtyFFShader |= decl->TestFlag(D3D9VertexDeclFlag::HasPositionT) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) || decl->TestFlag(D3D9VertexDeclFlag::HasColor0) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) || decl->TestFlag(D3D9VertexDeclFlag::HasColor1) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) || decl->GetTexcoordMask() != m_state.vertexDecl->GetTexcoordMask(); if (dirtyFFShader) m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); m_state.vertexDecl = decl; m_flags.set(D3D9DeviceFlag::DirtyInputLayout); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppDecl); if (ppDecl == nullptr) return D3D_OK; if (m_state.vertexDecl == nullptr) return D3D_OK; *ppDecl = m_state.vertexDecl.ref(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetFVF(DWORD FVF) { D3D9DeviceLock lock = LockDevice(); if (FVF == 0) return D3D_OK; D3D9VertexDecl* decl = nullptr; auto iter = m_fvfTable.find(FVF); if (iter == m_fvfTable.end()) { decl = new D3D9VertexDecl(this, FVF); m_fvfTable.insert(std::make_pair(FVF, decl)); } else decl = iter->second.ptr(); return this->SetVertexDeclaration(decl); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFVF(DWORD* pFVF) { D3D9DeviceLock lock = LockDevice(); if (pFVF == nullptr) return D3DERR_INVALIDCALL; *pFVF = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetFVF() : 0; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexShader( const DWORD* pFunction, IDirect3DVertexShader9** ppShader) { // CreateVertexShader does not init the // return ptr unlike CreatePixelShader if (unlikely(ppShader == nullptr)) return D3DERR_INVALIDCALL; DxsoModuleInfo moduleInfo; moduleInfo.options = m_dxsoOptions; D3D9CommonShader module; if (FAILED(this->CreateShaderModule(&module, VK_SHADER_STAGE_VERTEX_BIT, pFunction, &moduleInfo))) return D3DERR_INVALIDCALL; *ppShader = ref(new D3D9VertexShader(this, module)); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShader(IDirect3DVertexShader9* pShader) { D3D9DeviceLock lock = LockDevice(); D3D9VertexShader* shader = static_cast(pShader); if (unlikely(ShouldRecord())) return m_recorder->SetVertexShader(shader); if (shader == m_state.vertexShader.ptr()) return D3D_OK; auto* oldShader = GetCommonShader(m_state.vertexShader); auto* newShader = GetCommonShader(shader); bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader; m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(); if (newShader && oldShader) { m_consts[DxsoProgramTypes::VertexShader].dirty |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; } m_state.vertexShader = shader; if (shader != nullptr) { m_flags.clr(D3D9DeviceFlag::DirtyProgVertexShader); m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); BindShader( GetCommonShader(shader), GetVertexShaderPermutation()); m_vsShaderMasks = newShader->GetShaderMask(); } else m_vsShaderMasks = D3D9ShaderMasks(); m_flags.set(D3D9DeviceFlag::DirtyInputLayout); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShader(IDirect3DVertexShader9** ppShader) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppShader); if (unlikely(ppShader == nullptr)) return D3DERR_INVALIDCALL; *ppShader = m_state.vertexShader.ref(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantF( UINT StartRegister, const float* pConstantData, UINT Vector4fCount) { D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< DxsoProgramTypes::VertexShader, D3D9ConstantType::Float>( StartRegister, pConstantData, Vector4fCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantF( UINT StartRegister, float* pConstantData, UINT Vector4fCount) { D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< DxsoProgramTypes::VertexShader, D3D9ConstantType::Float>( StartRegister, pConstantData, Vector4fCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantI( UINT StartRegister, const int* pConstantData, UINT Vector4iCount) { D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< DxsoProgramTypes::VertexShader, D3D9ConstantType::Int>( StartRegister, pConstantData, Vector4iCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantI( UINT StartRegister, int* pConstantData, UINT Vector4iCount) { D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< DxsoProgramTypes::VertexShader, D3D9ConstantType::Int>( StartRegister, pConstantData, Vector4iCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantB( UINT StartRegister, const BOOL* pConstantData, UINT BoolCount) { D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< DxsoProgramTypes::VertexShader, D3D9ConstantType::Bool>( StartRegister, pConstantData, BoolCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantB( UINT StartRegister, BOOL* pConstantData, UINT BoolCount) { D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< DxsoProgramTypes::VertexShader, D3D9ConstantType::Bool>( StartRegister, pConstantData, BoolCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSource( UINT StreamNumber, IDirect3DVertexBuffer9* pStreamData, UINT OffsetInBytes, UINT Stride) { D3D9DeviceLock lock = LockDevice(); if (unlikely(StreamNumber >= caps::MaxStreams)) return D3DERR_INVALIDCALL; D3D9VertexBuffer* buffer = static_cast(pStreamData); if (unlikely(ShouldRecord())) return m_recorder->SetStreamSource( StreamNumber, buffer, OffsetInBytes, Stride); auto& vbo = m_state.vertexBuffers[StreamNumber]; bool needsUpdate = vbo.vertexBuffer != buffer; if (needsUpdate) vbo.vertexBuffer = buffer; if (buffer != nullptr) { needsUpdate |= vbo.offset != OffsetInBytes || vbo.stride != Stride; vbo.offset = OffsetInBytes; vbo.stride = Stride; } if (needsUpdate) BindVertexBuffer(StreamNumber, buffer, OffsetInBytes, Stride); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSource( UINT StreamNumber, IDirect3DVertexBuffer9** ppStreamData, UINT* pOffsetInBytes, UINT* pStride) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppStreamData); if (likely(pOffsetInBytes != nullptr)) *pOffsetInBytes = 0; if (likely(pStride != nullptr)) *pStride = 0; if (unlikely(ppStreamData == nullptr || pOffsetInBytes == nullptr || pStride == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(StreamNumber >= caps::MaxStreams)) return D3DERR_INVALIDCALL; const auto& vbo = m_state.vertexBuffers[StreamNumber]; *ppStreamData = vbo.vertexBuffer.ref(); *pOffsetInBytes = vbo.offset; *pStride = vbo.stride; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) { D3D9DeviceLock lock = LockDevice(); if (unlikely(StreamNumber >= caps::MaxStreams)) return D3DERR_INVALIDCALL; const bool indexed = Setting & D3DSTREAMSOURCE_INDEXEDDATA; const bool instanced = Setting & D3DSTREAMSOURCE_INSTANCEDATA; if (unlikely(StreamNumber == 0 && instanced)) return D3DERR_INVALIDCALL; if (unlikely(instanced && indexed)) return D3DERR_INVALIDCALL; if (unlikely(Setting == 0)) return D3DERR_INVALIDCALL; if (unlikely(ShouldRecord())) return m_recorder->SetStreamSourceFreq(StreamNumber, Setting); if (m_state.streamFreq[StreamNumber] == Setting) return D3D_OK; m_state.streamFreq[StreamNumber] = Setting; if (instanced) m_instancedData |= 1u << StreamNumber; else m_instancedData &= ~(1u << StreamNumber); m_flags.set(D3D9DeviceFlag::DirtyInputLayout); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting) { D3D9DeviceLock lock = LockDevice(); if (unlikely(StreamNumber >= caps::MaxStreams)) return D3DERR_INVALIDCALL; if (unlikely(pSetting == nullptr)) return D3DERR_INVALIDCALL; *pSetting = m_state.streamFreq[StreamNumber]; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetIndices(IDirect3DIndexBuffer9* pIndexData) { D3D9DeviceLock lock = LockDevice(); D3D9IndexBuffer* buffer = static_cast(pIndexData); if (unlikely(ShouldRecord())) return m_recorder->SetIndices(buffer); if (buffer == m_state.indices.ptr()) return D3D_OK; m_state.indices = buffer; BindIndices(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetIndices(IDirect3DIndexBuffer9** ppIndexData) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppIndexData); if (unlikely(ppIndexData == nullptr)) return D3DERR_INVALIDCALL; *ppIndexData = m_state.indices.ref(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreatePixelShader( const DWORD* pFunction, IDirect3DPixelShader9** ppShader) { InitReturnPtr(ppShader); if (unlikely(ppShader == nullptr)) return D3DERR_INVALIDCALL; DxsoModuleInfo moduleInfo; moduleInfo.options = m_dxsoOptions; D3D9CommonShader module; if (FAILED(this->CreateShaderModule(&module, VK_SHADER_STAGE_FRAGMENT_BIT, pFunction, &moduleInfo))) return D3DERR_INVALIDCALL; *ppShader = ref(new D3D9PixelShader(this, module)); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShader(IDirect3DPixelShader9* pShader) { D3D9DeviceLock lock = LockDevice(); D3D9PixelShader* shader = static_cast(pShader); if (unlikely(ShouldRecord())) return m_recorder->SetPixelShader(shader); if (shader == m_state.pixelShader.ptr()) return D3D_OK; auto* oldShader = GetCommonShader(m_state.pixelShader); auto* newShader = GetCommonShader(shader); bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader; m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(); if (newShader && oldShader) { m_consts[DxsoProgramTypes::PixelShader].dirty |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; } m_state.pixelShader = shader; if (shader != nullptr) { m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); BindShader( GetCommonShader(shader), GetPixelShaderPermutation()); m_psShaderMasks = newShader->GetShaderMask(); } else { // TODO: What fixed function textures are in use? // Currently we are making all 8 of them as in use here. // The RT output is always 0 for fixed function. m_psShaderMasks = FixedFunctionMask; } UpdateActiveHazardsRT(UINT32_MAX); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShader(IDirect3DPixelShader9** ppShader) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppShader); if (unlikely(ppShader == nullptr)) return D3DERR_INVALIDCALL; *ppShader = m_state.pixelShader.ref(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantF( UINT StartRegister, const float* pConstantData, UINT Vector4fCount) { D3D9DeviceLock lock = LockDevice(); return SetShaderConstants < DxsoProgramTypes::PixelShader, D3D9ConstantType::Float>( StartRegister, pConstantData, Vector4fCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantF( UINT StartRegister, float* pConstantData, UINT Vector4fCount) { D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< DxsoProgramTypes::PixelShader, D3D9ConstantType::Float>( StartRegister, pConstantData, Vector4fCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantI( UINT StartRegister, const int* pConstantData, UINT Vector4iCount) { D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< DxsoProgramTypes::PixelShader, D3D9ConstantType::Int>( StartRegister, pConstantData, Vector4iCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantI( UINT StartRegister, int* pConstantData, UINT Vector4iCount) { D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< DxsoProgramTypes::PixelShader, D3D9ConstantType::Int>( StartRegister, pConstantData, Vector4iCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantB( UINT StartRegister, const BOOL* pConstantData, UINT BoolCount) { D3D9DeviceLock lock = LockDevice(); return SetShaderConstants< DxsoProgramTypes::PixelShader, D3D9ConstantType::Bool>( StartRegister, pConstantData, BoolCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantB( UINT StartRegister, BOOL* pConstantData, UINT BoolCount) { D3D9DeviceLock lock = LockDevice(); return GetShaderConstants< DxsoProgramTypes::PixelShader, D3D9ConstantType::Bool>( StartRegister, pConstantData, BoolCount); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawRectPatch( UINT Handle, const float* pNumSegs, const D3DRECTPATCH_INFO* pRectPatchInfo) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::warn("D3D9DeviceEx::DrawRectPatch: Stub"); return D3DERR_INVALIDCALL; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawTriPatch( UINT Handle, const float* pNumSegs, const D3DTRIPATCH_INFO* pTriPatchInfo) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::warn("D3D9DeviceEx::DrawTriPatch: Stub"); return D3DERR_INVALIDCALL; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DeletePatch(UINT Handle) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::warn("D3D9DeviceEx::DeletePatch: Stub"); return D3DERR_INVALIDCALL; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery) { HRESULT hr = D3D9Query::QuerySupported(this, Type); if (ppQuery == nullptr || hr != D3D_OK) return hr; try { *ppQuery = ref(new D3D9Query(this, Type)); return D3D_OK; } catch (const DxvkError & e) { Logger::err(e.message()); return D3DERR_INVALIDCALL; } } // Ex Methods HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetConvolutionMonoKernel( UINT width, UINT height, float* rows, float* columns) { // We don't advertise support for this. return D3DERR_INVALIDCALL; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ComposeRects( IDirect3DSurface9* pSrc, IDirect3DSurface9* pDst, IDirect3DVertexBuffer9* pSrcRectDescs, UINT NumRects, IDirect3DVertexBuffer9* pDstRectDescs, D3DCOMPOSERECTSOP Operation, int Xoffset, int Yoffset) { Logger::warn("D3D9DeviceEx::ComposeRects: Stub"); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetGPUThreadPriority(INT* pPriority) { Logger::warn("D3D9DeviceEx::GetGPUThreadPriority: Stub"); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetGPUThreadPriority(INT Priority) { Logger::warn("D3D9DeviceEx::SetGPUThreadPriority: Stub"); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::WaitForVBlank(UINT iSwapChain) { if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; return m_implicitSwapchain->WaitForVBlank(); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckResourceResidency(IDirect3DResource9** pResourceArray, UINT32 NumResources) { Logger::warn("D3D9DeviceEx::CheckResourceResidency: Stub"); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaximumFrameLatency(UINT MaxLatency) { D3D9DeviceLock lock = LockDevice(); if (MaxLatency == 0) MaxLatency = DefaultFrameLatency; if (MaxLatency > MaxFrameLatency) MaxLatency = MaxFrameLatency; m_frameLatency = MaxLatency; m_implicitSwapchain->SyncFrameLatency(); return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaximumFrameLatency(UINT* pMaxLatency) { D3D9DeviceLock lock = LockDevice(); if (unlikely(pMaxLatency == nullptr)) return D3DERR_INVALIDCALL; *pMaxLatency = m_frameLatency; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckDeviceState(HWND hDestinationWindow) { return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::PresentEx( const RECT* pSourceRect, const RECT* pDestRect, HWND hDestWindowOverride, const RGNDATA* pDirtyRegion, DWORD dwFlags) { return m_implicitSwapchain->Present( pSourceRect, pDestRect, hDestWindowOverride, pDirtyRegion, dwFlags); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTargetEx( UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle, DWORD Usage) { InitReturnPtr(ppSurface); if (unlikely(ppSurface == nullptr)) return D3DERR_INVALIDCALL; D3D9_COMMON_TEXTURE_DESC desc; desc.Width = Width; desc.Height = Height; desc.Depth = 1; desc.ArraySize = 1; desc.MipLevels = 1; desc.Usage = Usage | D3DUSAGE_RENDERTARGET; desc.Format = EnumerateFormat(Format); desc.Pool = D3DPOOL_DEFAULT; desc.Discard = FALSE; desc.MultiSample = MultiSample; desc.MultisampleQuality = MultisampleQuality; desc.IsBackBuffer = FALSE; desc.IsAttachmentOnly = TRUE; if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) return D3DERR_INVALIDCALL; try { const Com surface = new D3D9Surface(this, &desc, nullptr); m_initializer->InitTexture(surface->GetCommonTexture()); *ppSurface = surface.ref(); return D3D_OK; } catch (const DxvkError& e) { Logger::err(e.message()); return D3DERR_OUTOFVIDEOMEMORY; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurfaceEx( UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle, DWORD Usage) { InitReturnPtr(ppSurface); if (unlikely(ppSurface == nullptr)) return D3DERR_INVALIDCALL; D3D9_COMMON_TEXTURE_DESC desc; desc.Width = Width; desc.Height = Height; desc.Depth = 1; desc.ArraySize = 1; desc.MipLevels = 1; desc.Usage = Usage; desc.Format = EnumerateFormat(Format); desc.Pool = Pool; desc.Discard = FALSE; desc.MultiSample = D3DMULTISAMPLE_NONE; desc.MultisampleQuality = 0; desc.IsBackBuffer = FALSE; desc.IsAttachmentOnly = Pool == D3DPOOL_DEFAULT; if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) return D3DERR_INVALIDCALL; try { const Com surface = new D3D9Surface(this, &desc, nullptr); m_initializer->InitTexture(surface->GetCommonTexture()); *ppSurface = surface.ref(); return D3D_OK; } catch (const DxvkError& e) { Logger::err(e.message()); return D3DERR_OUTOFVIDEOMEMORY; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurfaceEx( UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9** ppSurface, HANDLE* pSharedHandle, DWORD Usage) { InitReturnPtr(ppSurface); if (unlikely(ppSurface == nullptr)) return D3DERR_INVALIDCALL; D3D9_COMMON_TEXTURE_DESC desc; desc.Width = Width; desc.Height = Height; desc.Depth = 1; desc.ArraySize = 1; desc.MipLevels = 1; desc.Usage = Usage | D3DUSAGE_DEPTHSTENCIL; desc.Format = EnumerateFormat(Format); desc.Pool = D3DPOOL_DEFAULT; desc.Discard = Discard; desc.MultiSample = MultiSample; desc.MultisampleQuality = MultisampleQuality; desc.IsBackBuffer = FALSE; desc.IsAttachmentOnly = TRUE; if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) return D3DERR_INVALIDCALL; try { const Com surface = new D3D9Surface(this, &desc, nullptr); m_initializer->InitTexture(surface->GetCommonTexture()); *ppSurface = surface.ref(); return D3D_OK; } catch (const DxvkError& e) { Logger::err(e.message()); return D3DERR_OUTOFVIDEOMEMORY; } } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ResetEx( D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { D3D9DeviceLock lock = LockDevice(); HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); if (FAILED(hr)) return hr; return D3D_OK; } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayModeEx( UINT iSwapChain, D3DDISPLAYMODEEX* pMode, D3DDISPLAYROTATION* pRotation) { if (unlikely(iSwapChain != 0)) return D3DERR_INVALIDCALL; return m_implicitSwapchain->GetDisplayModeEx(pMode, pRotation); } HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChainEx( D3DPRESENT_PARAMETERS* pPresentationParameters, const D3DDISPLAYMODEEX* pFullscreenDisplayMode, IDirect3DSwapChain9** ppSwapChain) { D3D9DeviceLock lock = LockDevice(); InitReturnPtr(ppSwapChain); if (ppSwapChain == nullptr || pPresentationParameters == nullptr) return D3DERR_INVALIDCALL; // Additional fullscreen swapchains are forbidden. if (!pPresentationParameters->Windowed) return D3DERR_INVALIDCALL; // We can't make another swapchain if we are fullscreen. if (!m_implicitSwapchain->GetPresentParams()->Windowed) return D3DERR_INVALIDCALL; m_implicitSwapchain->Invalidate(pPresentationParameters->hDeviceWindow); try { auto* swapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode); *ppSwapChain = ref(swapchain); } catch (const DxvkError & e) { Logger::err(e.message()); return D3DERR_NOTAVAILABLE; } return D3D_OK; } HRESULT D3D9DeviceEx::SetStateSamplerState( DWORD StateSampler, D3DSAMPLERSTATETYPE Type, DWORD Value) { D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) return m_recorder->SetStateSamplerState(StateSampler, Type, Value); auto& state = m_state.samplerStates; if (state[StateSampler][Type] == Value) return D3D_OK; state[StateSampler][Type] = Value; if (Type == D3DSAMP_ADDRESSU || Type == D3DSAMP_ADDRESSV || Type == D3DSAMP_ADDRESSW || Type == D3DSAMP_MAGFILTER || Type == D3DSAMP_MINFILTER || Type == D3DSAMP_MIPFILTER || Type == D3DSAMP_MAXANISOTROPY || Type == D3DSAMP_MIPMAPLODBIAS || Type == D3DSAMP_MAXMIPLEVEL || Type == D3DSAMP_BORDERCOLOR) m_dirtySamplerStates |= 1u << StateSampler; else if (Type == D3DSAMP_SRGBTEXTURE && (m_activeTextures & (1u << StateSampler))) m_dirtyTextures |= 1u << StateSampler; constexpr DWORD Fetch4Enabled = MAKEFOURCC('G', 'E', 'T', '4'); constexpr DWORD Fetch4Disabled = MAKEFOURCC('G', 'E', 'T', '1'); if (unlikely(Type == D3DSAMP_MIPMAPLODBIAS)) { if (unlikely(Value == Fetch4Enabled)) { m_fetch4Enabled |= 1u << StateSampler; if (state[StateSampler][D3DSAMP_MAGFILTER] == D3DTEXF_POINT) m_fetch4 |= 1u << StateSampler; } else if (unlikely(Value == Fetch4Disabled)) { m_fetch4Enabled &= ~(1u << StateSampler); m_fetch4 &= ~(1u << StateSampler); } } if (unlikely(Type == D3DSAMP_MAGFILTER && (m_fetch4Enabled & (1u << StateSampler)))) { if (Value == D3DTEXF_POINT) m_fetch4 |= 1u << StateSampler; else m_fetch4 &= ~(1u << StateSampler); } return D3D_OK; } HRESULT D3D9DeviceEx::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) { D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) return m_recorder->SetStateTexture(StateSampler, pTexture); if (m_state.textures[StateSampler] == pTexture) return D3D_OK; // We need to check our ops and disable respective stages. // Given we have transition from a null resource to // a valid resource or vice versa. if (StateSampler < 16 && (pTexture == nullptr || m_state.textures[StateSampler] == nullptr)) m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); auto oldTexture = GetCommonTexture(m_state.textures[StateSampler]); auto newTexture = GetCommonTexture(pTexture); DWORD oldUsage = oldTexture != nullptr ? oldTexture->Desc()->Usage : 0; DWORD newUsage = newTexture != nullptr ? newTexture->Desc()->Usage : 0; if (newTexture != nullptr) { const bool oldDepth = m_depthTextures & (1u << StateSampler); const bool newDepth = newTexture->IsShadow(); if (oldDepth != newDepth) { m_depthTextures &= ~(1u << StateSampler); if (newDepth) m_depthTextures |= 1u << StateSampler; m_dirtySamplerStates |= 1u << StateSampler; } } DWORD combinedUsage = oldUsage | newUsage; TextureChangePrivate(m_state.textures[StateSampler], pTexture); m_dirtyTextures |= 1u << StateSampler; UpdateActiveTextures(StateSampler, combinedUsage); return D3D_OK; } HRESULT D3D9DeviceEx::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) { D3D9DeviceLock lock = LockDevice(); if (unlikely(ShouldRecord())) return m_recorder->SetStateTransform(idx, pMatrix); m_state.transforms[idx] = ConvertMatrix(pMatrix); m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD)) m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); return D3D_OK; } HRESULT D3D9DeviceEx::SetStateTextureStageState( DWORD Stage, D3D9TextureStageStateTypes Type, DWORD Value) { D3D9DeviceLock lock = LockDevice(); if (unlikely(Stage >= caps::TextureStageCount)) return D3DERR_INVALIDCALL; if (unlikely(Type >= TextureStageStateCount)) return D3DERR_INVALIDCALL; if (unlikely(ShouldRecord())) return m_recorder->SetStateTextureStageState(Stage, Type, Value); if (likely(m_state.textureStages[Stage][Type] != Value)) { m_state.textureStages[Stage][Type] = Value; switch (Type) { case DXVK_TSS_COLOROP: case DXVK_TSS_COLORARG0: case DXVK_TSS_COLORARG1: case DXVK_TSS_COLORARG2: case DXVK_TSS_ALPHAOP: case DXVK_TSS_ALPHAARG0: case DXVK_TSS_ALPHAARG1: case DXVK_TSS_ALPHAARG2: case DXVK_TSS_RESULTARG: m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); break; case DXVK_TSS_TEXCOORDINDEX: m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); break; case DXVK_TSS_TEXTURETRANSFORMFLAGS: m_projectionBitfield &= ~(1 << Stage); if (Value & D3DTTFF_PROJECTED) m_projectionBitfield |= 1 << Stage; m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); break; case DXVK_TSS_BUMPENVMAT00: case DXVK_TSS_BUMPENVMAT01: case DXVK_TSS_BUMPENVMAT10: case DXVK_TSS_BUMPENVMAT11: case DXVK_TSS_BUMPENVLSCALE: case DXVK_TSS_BUMPENVLOFFSET: case DXVK_TSS_CONSTANT: m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData); break; default: break; } } return D3D_OK; } bool D3D9DeviceEx::IsExtended() { return m_parent->IsExtended(); } bool D3D9DeviceEx::SupportsSWVP() { return m_dxvkDevice->features().core.features.vertexPipelineStoresAndAtomics; } HWND D3D9DeviceEx::GetWindow() { return m_window; } DxvkDeviceFeatures D3D9DeviceEx::GetDeviceFeatures(const Rc& adapter) { DxvkDeviceFeatures supported = adapter->features(); DxvkDeviceFeatures enabled = {}; // Geometry shaders are used for some meta ops enabled.core.features.geometryShader = VK_TRUE; enabled.core.features.robustBufferAccess = VK_TRUE; enabled.extRobustness2.robustBufferAccess2 = supported.extRobustness2.robustBufferAccess2; enabled.extMemoryPriority.memoryPriority = supported.extMemoryPriority.memoryPriority; enabled.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation = supported.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation; enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor; enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor; // Null Descriptors enabled.extRobustness2.nullDescriptor = supported.extRobustness2.nullDescriptor; // ProcessVertices enabled.core.features.vertexPipelineStoresAndAtomics = supported.core.features.vertexPipelineStoresAndAtomics; // DXVK Meta enabled.core.features.shaderStorageImageWriteWithoutFormat = VK_TRUE; enabled.core.features.imageCubeArray = VK_TRUE; // SM1 level hardware enabled.core.features.depthClamp = VK_TRUE; enabled.core.features.depthBiasClamp = VK_TRUE; enabled.core.features.fillModeNonSolid = VK_TRUE; enabled.core.features.pipelineStatisticsQuery = supported.core.features.pipelineStatisticsQuery; enabled.core.features.sampleRateShading = VK_TRUE; enabled.core.features.samplerAnisotropy = supported.core.features.samplerAnisotropy; enabled.core.features.shaderClipDistance = VK_TRUE; enabled.core.features.shaderCullDistance = VK_TRUE; // Ensure we support real BC formats and unofficial vendor ones. enabled.core.features.textureCompressionBC = VK_TRUE; enabled.extDepthClipEnable.depthClipEnable = supported.extDepthClipEnable.depthClipEnable; enabled.extHostQueryReset.hostQueryReset = supported.extHostQueryReset.hostQueryReset; // SM2 level hardware enabled.core.features.occlusionQueryPrecise = VK_TRUE; // SM3 level hardware enabled.core.features.multiViewport = VK_TRUE; enabled.core.features.independentBlend = VK_TRUE; // D3D10 level hardware supports this in D3D9 native. enabled.core.features.fullDrawIndexUint32 = VK_TRUE; // Enable depth bounds test if we support it. enabled.core.features.depthBounds = supported.core.features.depthBounds; if (supported.extCustomBorderColor.customBorderColorWithoutFormat) { enabled.extCustomBorderColor.customBorderColors = VK_TRUE; enabled.extCustomBorderColor.customBorderColorWithoutFormat = VK_TRUE; } return enabled; } void D3D9DeviceEx::DetermineConstantLayouts(bool canSWVP) { m_vsLayout.floatCount = canSWVP ? uint32_t(m_d3d9Options.swvpFloatCount) : caps::MaxFloatConstantsVS; m_vsLayout.intCount = canSWVP ? uint32_t(m_d3d9Options.swvpIntCount) : caps::MaxOtherConstants; m_vsLayout.boolCount = canSWVP ? uint32_t(m_d3d9Options.swvpBoolCount) : caps::MaxOtherConstants; m_vsLayout.bitmaskCount = align(m_vsLayout.boolCount, 32) / 32; m_psLayout.floatCount = caps::MaxFloatConstantsPS; m_psLayout.intCount = caps::MaxOtherConstants; m_psLayout.boolCount = caps::MaxOtherConstants; m_psLayout.bitmaskCount = align(m_psLayout.boolCount, 32) / 32; } template D3D9BufferSlice D3D9DeviceEx::AllocTempBuffer(VkDeviceSize size) { constexpr VkDeviceSize DefaultSize = 1 << 20; VkMemoryPropertyFlags memoryFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; if constexpr (UpBuffer) { memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; } D3D9BufferSlice& currentSlice = UpBuffer ? m_upBuffer : m_managedUploadBuffer; if (size <= DefaultSize) { if (unlikely(!currentSlice.slice.defined())) { DxvkBufferCreateInfo info; info.size = DefaultSize; if constexpr (UpBuffer) { info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; } else { info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; info.access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT; } currentSlice.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags)); currentSlice.mapPtr = currentSlice.slice.mapPtr(0); } else if (unlikely(currentSlice.slice.length() < size)) { auto physSlice = currentSlice.slice.buffer()->allocSlice(); currentSlice.slice = DxvkBufferSlice(currentSlice.slice.buffer()); currentSlice.mapPtr = physSlice.mapPtr; EmitCs([ cBuffer = currentSlice.slice.buffer(), cSlice = physSlice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cSlice); }); } D3D9BufferSlice result; result.slice = currentSlice.slice.subSlice(0, size); result.mapPtr = reinterpret_cast(currentSlice.mapPtr) + currentSlice.slice.offset(); VkDeviceSize adjust = align(size, CACHE_LINE_SIZE); currentSlice.slice = currentSlice.slice.subSlice(adjust, currentSlice.slice.length() - adjust); return result; } else { // Create a temporary buffer for very large allocations DxvkBufferCreateInfo info; info.size = size; if constexpr (UpBuffer) { info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; } else { info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT; info.access = VK_ACCESS_TRANSFER_READ_BIT; } D3D9BufferSlice result; result.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags)); result.mapPtr = result.slice.mapPtr(0); return result; } } bool D3D9DeviceEx::ShouldRecord() { return m_recorder != nullptr && !m_recorder->IsApplying(); } D3D9_VK_FORMAT_MAPPING D3D9DeviceEx::LookupFormat( D3D9Format Format) const { return m_adapter->GetFormatMapping(Format); } DxvkFormatInfo D3D9DeviceEx::UnsupportedFormatInfo( D3D9Format Format) const { return m_adapter->GetUnsupportedFormatInfo(Format); } bool D3D9DeviceEx::WaitForResource( const Rc& Resource, DWORD MapFlags) { // Wait for the any pending D3D9 command to be executed // on the CS thread so that we can determine whether the // resource is currently in use or not. // Determine access type to wait for based on map mode DxvkAccess access = (MapFlags & D3DLOCK_READONLY) ? DxvkAccess::Write : DxvkAccess::Read; if (!Resource->isInUse(access)) SynchronizeCsThread(); if (Resource->isInUse(access)) { if (MapFlags & D3DLOCK_DONOTWAIT) { // We don't have to wait, but misbehaving games may // still try to spin on `Map` until the resource is // idle, so we should flush pending commands FlushImplicit(FALSE); return false; } else { // Make sure pending commands using the resource get // executed on the the GPU if we have to wait for it Flush(); SynchronizeCsThread(); Resource->waitIdle(access); } } return true; } uint32_t D3D9DeviceEx::CalcImageLockOffset( uint32_t SlicePitch, uint32_t RowPitch, const DxvkFormatInfo* FormatInfo, const D3DBOX* pBox) { if (pBox == nullptr) return 0; std::array offsets = { pBox->Front, pBox->Top, pBox->Left }; uint32_t elementSize = 1; if (FormatInfo != nullptr) { elementSize = FormatInfo->elementSize; offsets[0] = offsets[0] / FormatInfo->blockSize.depth; offsets[1] = offsets[1] / FormatInfo->blockSize.height; offsets[2] = offsets[2] / FormatInfo->blockSize.width; } return offsets[0] * SlicePitch + offsets[1] * RowPitch + offsets[2] * elementSize; } HRESULT D3D9DeviceEx::LockImage( D3D9CommonTexture* pResource, UINT Face, UINT MipLevel, D3DLOCKED_BOX* pLockedBox, const D3DBOX* pBox, DWORD Flags) { D3D9DeviceLock lock = LockDevice(); UINT Subresource = pResource->CalcSubresource(Face, MipLevel); // Don't allow multiple lockings. if (unlikely(pResource->GetLocked(Subresource))) return D3DERR_INVALIDCALL; if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_READONLY)) == (D3DLOCK_DISCARD | D3DLOCK_READONLY))) return D3DERR_INVALIDCALL; if (unlikely(!m_d3d9Options.allowDoNotWait)) Flags &= ~D3DLOCK_DONOTWAIT; if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))) Flags &= ~D3DLOCK_DISCARD; auto& desc = *(pResource->Desc()); bool alloced = pResource->CreateBufferSubresource(Subresource); const Rc mappedBuffer = pResource->GetBuffer(Subresource); auto formatInfo = imageFormatInfo(pResource->GetFormatMapping().FormatColor); auto subresource = pResource->GetSubresourceFromIndex( formatInfo->aspectMask, Subresource); VkExtent3D levelExtent = pResource->GetExtentMip(MipLevel); VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize); const bool systemmem = desc.Pool == D3DPOOL_SYSTEMMEM; const bool managed = IsPoolManaged(desc.Pool); const bool scratch = desc.Pool == D3DPOOL_SCRATCH; bool fullResource = pBox == nullptr; if (unlikely(!fullResource)) { VkOffset3D lockOffset; VkExtent3D lockExtent; ConvertBox(*pBox, lockOffset, lockExtent); fullResource = lockOffset == VkOffset3D{ 0, 0, 0 } && lockExtent.width >= levelExtent.width && lockExtent.height >= levelExtent.height && lockExtent.depth >= levelExtent.depth; } // If we are not locking the entire image // a partial discard is meant to occur. // We can't really implement that, so just ignore discard // if we are not locking the full resource // DISCARD is also ignored for MANAGED and SYSTEMEM. // DISCARD is not ignored for non-DYNAMIC unlike what the docs say. if (!fullResource || desc.Pool != D3DPOOL_DEFAULT) Flags &= ~D3DLOCK_DISCARD; if (desc.Usage & D3DUSAGE_WRITEONLY) Flags &= ~D3DLOCK_READONLY; const bool readOnly = Flags & D3DLOCK_READONLY; pResource->SetReadOnlyLocked(Subresource, readOnly); bool renderable = desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL); // If we recently wrote to the texture on the gpu, // then we need to copy -> buffer // We are also always dirty if we are a render target, // a depth stencil, or auto generate mipmaps. bool wasWrittenByGPU = pResource->WasWrittenByGPU(Subresource) || renderable; pResource->SetWrittenByGPU(Subresource, false); DxvkBufferSliceHandle physSlice; if (Flags & D3DLOCK_DISCARD) { // We do not have to preserve the contents of the // buffer if the entire image gets discarded. physSlice = pResource->DiscardMapSlice(Subresource); EmitCs([ cImageBuffer = std::move(mappedBuffer), cBufferSlice = physSlice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cImageBuffer, cBufferSlice); }); } else if ((managed && !m_d3d9Options.evictManagedOnUnlock) || scratch || systemmem) { // Managed and scratch resources // are meant to be able to provide readback without waiting. // We always keep a copy of them in system memory for this reason. // No need to wait as its not in use. physSlice = pResource->GetMappedSlice(Subresource); // We do not need to wait for the resource in the event the // calling app promises not to overwrite data that is in use // or is reading. Remember! This will only trigger for MANAGED resources // that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting. const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(Subresource); const bool skipWait = (scratch || managed || (systemmem && !wasWrittenByGPU)) && (usesStagingBuffer || readOnly); if (alloced) { std::memset(physSlice.mapPtr, 0, physSlice.length); } else if (!skipWait) { if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappedBuffer, D3DLOCK_DONOTWAIT)) pResource->EnableStagingBufferUploads(Subresource); if (!WaitForResource(mappedBuffer, Flags)) return D3DERR_WASSTILLDRAWING; } } else { physSlice = mappedBuffer->getSliceHandle(); if (!alloced || wasWrittenByGPU) { if (unlikely(wasWrittenByGPU)) { Rc resourceImage = pResource->GetImage(); Rc mappedImage = resourceImage->info().sampleCount != 1 ? pResource->GetResolveImage() : std::move(resourceImage); // When using any map mode which requires the image contents // to be preserved, and if the GPU has write access to the // image, copy the current image contents into the buffer. auto subresourceLayers = vk::makeSubresourceLayers(subresource); // We need to resolve this, some games // lock MSAA render targets even though // that's entirely illegal and they explicitly // tell us that they do NOT want to lock them... if (resourceImage != nullptr) { EmitCs([ cMainImage = resourceImage, cResolveImage = mappedImage, cSubresource = subresourceLayers ] (DxvkContext* ctx) { VkImageResolve region; region.srcSubresource = cSubresource; region.srcOffset = VkOffset3D { 0, 0, 0 }; region.dstSubresource = cSubresource; region.dstOffset = VkOffset3D { 0, 0, 0 }; region.extent = cMainImage->mipLevelExtent(cSubresource.mipLevel); if (cSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->resolveImage( cResolveImage, cMainImage, region, cMainImage->info().format); } else { ctx->resolveDepthStencilImage( cResolveImage, cMainImage, region, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR); } }); } VkFormat packedFormat = GetPackedDepthStencilFormat(desc.Format); EmitCs([ cImageBuffer = mappedBuffer, cImage = std::move(mappedImage), cSubresources = subresourceLayers, cLevelExtent = levelExtent, cPackedFormat = packedFormat ] (DxvkContext* ctx) { if (cSubresources.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->copyImageToBuffer(cImageBuffer, 0, 4, 0, cImage, cSubresources, VkOffset3D { 0, 0, 0 }, cLevelExtent); } else { // Copying DS to a packed buffer is only supported for D24S8 and D32S8 // right now so the 4 byte row alignment is guaranteed by the format size ctx->copyDepthStencilImageToPackedBuffer( cImageBuffer, 0, VkOffset2D { 0, 0 }, VkExtent2D { cLevelExtent.width, cLevelExtent.height }, cImage, cSubresources, VkOffset2D { 0, 0 }, VkExtent2D { cLevelExtent.width, cLevelExtent.height }, cPackedFormat); } }); } if (!WaitForResource(mappedBuffer, Flags)) return D3DERR_WASSTILLDRAWING; } else { // If we are a new alloc, and we weren't written by the GPU // that means that we are a newly initialized // texture, and hence can just memset -> 0 and // avoid a wait here. std::memset(physSlice.mapPtr, 0, physSlice.length); } } const bool atiHack = desc.Format == D3D9Format::ATI1 || desc.Format == D3D9Format::ATI2; // Set up map pointer. if (atiHack) { // We need to lie here. The game is expected to use this info and do a workaround. // It's stupid. I know. pLockedBox->RowPitch = align(std::max(desc.Width >> MipLevel, 1u), 4); pLockedBox->SlicePitch = pLockedBox->RowPitch * std::max(desc.Height >> MipLevel, 1u); } else { // Data is tightly packed within the mapped buffer. pLockedBox->RowPitch = align(formatInfo->elementSize * blockCount.width, 4); pLockedBox->SlicePitch = pLockedBox->RowPitch * blockCount.height; } pResource->SetLocked(Subresource, true); const bool noDirtyUpdate = Flags & D3DLOCK_NO_DIRTY_UPDATE; if (likely((pResource->IsManaged() && m_d3d9Options.evictManagedOnUnlock) || ((desc.Pool == D3DPOOL_DEFAULT || !noDirtyUpdate) && !readOnly))) { if (pBox && MipLevel != 0) { D3DBOX scaledBox = *pBox; scaledBox.Left <<= MipLevel; scaledBox.Right = std::min(scaledBox.Right << MipLevel, pResource->Desc()->Width); scaledBox.Top <<= MipLevel; scaledBox.Bottom = std::min(scaledBox.Bottom << MipLevel, pResource->Desc()->Height); scaledBox.Back <<= MipLevel; scaledBox.Front = std::min(scaledBox.Front << MipLevel, pResource->Desc()->Depth); pResource->AddDirtyBox(&scaledBox, Face); } else { pResource->AddDirtyBox(pBox, Face); } } if (managed && !m_d3d9Options.evictManagedOnUnlock && !readOnly) { pResource->SetNeedsUpload(Subresource, true); for (uint32_t i : bit::BitMask(m_activeTextures)) { // Guaranteed to not be nullptr... auto texInfo = GetCommonTexture(m_state.textures[i]); if (texInfo == pResource) { m_activeTexturesToUpload |= 1 << i; // We can early out here, no need to add another index for this. break; } } } const uint32_t offset = CalcImageLockOffset( pLockedBox->SlicePitch, pLockedBox->RowPitch, (!atiHack) ? formatInfo : nullptr, pBox); uint8_t* data = reinterpret_cast(physSlice.mapPtr); data += offset; pLockedBox->pBits = data; return D3D_OK; } HRESULT D3D9DeviceEx::UnlockImage( D3D9CommonTexture* pResource, UINT Face, UINT MipLevel) { D3D9DeviceLock lock = LockDevice(); UINT Subresource = pResource->CalcSubresource(Face, MipLevel); // We weren't locked anyway! if (unlikely(!pResource->GetLocked(Subresource))) return D3D_OK; pResource->SetLocked(Subresource, false); // Flush image contents from staging if we aren't read only // and we aren't deferring for managed. const D3DBOX& box = pResource->GetDirtyBox(Face); bool shouldFlush = pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED; shouldFlush &= box.Left < box.Right && box.Top < box.Bottom && box.Front < box.Back; shouldFlush &= !pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock; if (shouldFlush) { this->FlushImage(pResource, Subresource); if (!pResource->IsAnySubresourceLocked()) pResource->ClearDirtyBoxes(); } // Toss our staging buffer if we're not dynamic // and we aren't managed (for sysmem copy.) bool shouldToss = pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED; shouldToss &= !pResource->IsDynamic(); shouldToss &= !pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock; if (shouldToss) { pResource->DestroyBufferSubresource(Subresource); pResource->SetWrittenByGPU(Subresource, true); } return D3D_OK; } HRESULT D3D9DeviceEx::FlushImage( D3D9CommonTexture* pResource, UINT Subresource) { const Rc image = pResource->GetImage(); // Now that data has been written into the buffer, // we need to copy its contents into the image const DxvkBufferSliceHandle srcSlice = pResource->GetMappedSlice(Subresource); auto formatInfo = imageFormatInfo(image->info().format); auto subresource = pResource->GetSubresourceFromIndex( formatInfo->aspectMask, Subresource); VkImageSubresourceLayers subresourceLayers = { subresource.aspectMask, subresource.mipLevel, subresource.arrayLayer, 1 }; auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo; if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) { VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, subresource.mipLevel, subresource.arrayLayer, 1 }; const D3DBOX& box = pResource->GetDirtyBox(subresource.arrayLayer); VkOffset3D scaledBoxOffset = { int32_t(alignDown(box.Left >> subresource.mipLevel, formatInfo->blockSize.width)), int32_t(alignDown(box.Top >> subresource.mipLevel, formatInfo->blockSize.height)), int32_t(alignDown(box.Front >> subresource.mipLevel, formatInfo->blockSize.depth)) }; VkExtent3D scaledBoxExtent = util::computeMipLevelExtent({ uint32_t(box.Right - int32_t(alignDown(box.Left, formatInfo->blockSize.width))), uint32_t(box.Bottom - int32_t(alignDown(box.Top, formatInfo->blockSize.height))), uint32_t(box.Back - int32_t(alignDown(box.Front, formatInfo->blockSize.depth))) }, subresource.mipLevel); VkExtent3D scaledBoxExtentBlockCount = util::computeBlockCount(scaledBoxExtent, formatInfo->blockSize); VkExtent3D scaledAlignedBoxExtent = util::computeBlockExtent(scaledBoxExtentBlockCount, formatInfo->blockSize); VkExtent3D texLevelExtent = image->mipLevelExtent(subresource.mipLevel); VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); scaledAlignedBoxExtent.width = std::min(texLevelExtent.width - scaledBoxOffset.x, scaledAlignedBoxExtent.width); scaledAlignedBoxExtent.height = std::min(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height); scaledAlignedBoxExtent.depth = std::min(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth); VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize); VkDeviceSize copySrcOffset = (boxOffsetBlockCount.z * texLevelExtentBlockCount.height * texLevelExtentBlockCount.width + boxOffsetBlockCount.y * texLevelExtentBlockCount.width + boxOffsetBlockCount.x) * formatInfo->elementSize; VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); VkDeviceSize rowAlignment = 0; DxvkBufferSlice copySrcSlice; if (pResource->DoesStagingBufferUploads(Subresource)) { VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize; D3D9BufferSlice slice = AllocTempBuffer(dirtySize); copySrcSlice = slice.slice; void* srcData = reinterpret_cast(srcSlice.mapPtr) + copySrcOffset; util::packImageData( slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize, pitch, pitch * texLevelExtentBlockCount.height); } else { copySrcSlice = DxvkBufferSlice(pResource->GetBuffer(Subresource), copySrcOffset, srcSlice.length); rowAlignment = pitch; // row alignment can act as the pitch parameter } EmitCs([ cSrcSlice = std::move(copySrcSlice), cDstImage = image, cDstLayers = dstLayers, cDstLevelExtent = scaledAlignedBoxExtent, cOffset = scaledBoxOffset, cRowAlignment = rowAlignment ] (DxvkContext* ctx) { ctx->copyBufferToImage( cDstImage, cDstLayers, cOffset, cDstLevelExtent, cSrcSlice.buffer(), cSrcSlice.offset(), cRowAlignment, 0); }); } else { const DxvkFormatInfo* formatInfo = imageFormatInfo(pResource->GetFormatMapping().FormatColor); VkExtent3D texLevelExtent = image->mipLevelExtent(subresource.mipLevel); VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); // Add more blocks for the other planes that we might have. // TODO: PLEASE CLEAN ME texLevelExtentBlockCount.height *= std::min(convertFormat.PlaneCount, 2u); // the converter can not handle the 4 aligned pitch so we always repack into a staging buffer D3D9BufferSlice slice = AllocTempBuffer(srcSlice.length); VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); util::packImageData( slice.mapPtr, srcSlice.mapPtr, texLevelExtentBlockCount, formatInfo->elementSize, pitch, std::min(convertFormat.PlaneCount, 2u) * pitch * texLevelExtentBlockCount.height); Flush(); SynchronizeCsThread(); m_converter->ConvertFormat( convertFormat, image, subresourceLayers, slice.slice); } if (pResource->IsAutomaticMip()) MarkTextureMipsDirty(pResource); return D3D_OK; } void D3D9DeviceEx::EmitGenerateMips( D3D9CommonTexture* pResource) { if (pResource->IsManaged()) UploadManagedTexture(pResource); EmitCs([ cImageView = pResource->GetSampleView(false), cFilter = pResource->GetMipFilter() ] (DxvkContext* ctx) { ctx->generateMipmaps(cImageView, DecodeFilter(cFilter)); }); } HRESULT D3D9DeviceEx::LockBuffer( D3D9CommonBuffer* pResource, UINT OffsetToLock, UINT SizeToLock, void** ppbData, DWORD Flags) { D3D9DeviceLock lock = LockDevice(); if (unlikely(ppbData == nullptr)) return D3DERR_INVALIDCALL; if (!m_d3d9Options.allowDiscard) Flags &= ~D3DLOCK_DISCARD; auto& desc = *pResource->Desc(); // Ignore DISCARD if NOOVERWRITE is set if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE))) Flags &= ~D3DLOCK_DISCARD; // Ignore DISCARD and NOOVERWRITE if the buffer is not DEFAULT pool (tests + Halo 2) // The docs say DISCARD and NOOVERWRITE are ignored if the buffer is not DYNAMIC // but tests say otherwise! if (desc.Pool != D3DPOOL_DEFAULT) Flags &= ~(D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE); // Ignore DONOTWAIT if we are DYNAMIC // Yes... D3D9 is a good API. if (desc.Usage & D3DUSAGE_DYNAMIC) Flags &= ~D3DLOCK_DONOTWAIT; // We only bounds check for MANAGED. // (TODO: Apparently this is meant to happen for DYNAMIC too but I am not sure // how that works given it is meant to be a DIRECT access..?) const bool respectUserBounds = !(Flags & D3DLOCK_DISCARD) && SizeToLock != 0; // If we don't respect the bounds, encompass it all in our tests/checks // These values may be out of range and don't get clamped. uint32_t offset = respectUserBounds ? OffsetToLock : 0; uint32_t size = respectUserBounds ? std::min(SizeToLock, desc.Size - offset) : desc.Size; D3D9Range lockRange = D3D9Range(offset, offset + size); if ((desc.Pool == D3DPOOL_DEFAULT || !(Flags & D3DLOCK_NO_DIRTY_UPDATE)) && !(Flags & D3DLOCK_READONLY)) pResource->DirtyRange().Conjoin(lockRange); Rc mappingBuffer = pResource->GetBuffer(); DxvkBufferSliceHandle physSlice; if (Flags & D3DLOCK_DISCARD) { // Allocate a new backing slice for the buffer and set // it as the 'new' mapped slice. This assumes that the // only way to invalidate a buffer is by mapping it. physSlice = pResource->DiscardMapSlice(); EmitCs([ cBuffer = std::move(mappingBuffer), cBufferSlice = physSlice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cBufferSlice); }); pResource->SetWrittenByGPU(false); pResource->GPUReadingRange().Clear(); } else { // Use map pointer from previous map operation. This // way we don't have to synchronize with the CS thread // if the map mode is D3DLOCK_NOOVERWRITE. physSlice = pResource->GetMappedSlice(); // NOOVERWRITE promises that they will not write in a currently used area. // Therefore we can skip waiting for these two cases. // We can also skip waiting if there is not dirty range overlap, if we are one of those resources. // If we are respecting the bounds ie. (MANAGED) we can test overlap // of our bounds, otherwise we just ignore this and go for it all the time. const bool wasWrittenByGPU = pResource->WasWrittenByGPU(); const bool readOnly = Flags & D3DLOCK_READONLY; const bool noOverlap = !pResource->GPUReadingRange().Overlaps(lockRange); const bool noOverwrite = Flags & D3DLOCK_NOOVERWRITE; const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(); const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT; const bool skipWait = (!wasWrittenByGPU && (usesStagingBuffer || readOnly || (noOverlap && !directMapping))) || noOverwrite; if (!skipWait) { if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappingBuffer, D3DLOCK_DONOTWAIT)) pResource->EnableStagingBufferUploads(); if (!WaitForResource(mappingBuffer, Flags)) return D3DERR_WASSTILLDRAWING; pResource->SetWrittenByGPU(false); pResource->GPUReadingRange().Clear(); } } uint8_t* data = reinterpret_cast(physSlice.mapPtr); // The offset/size is not clamped to or affected by the desc size. data += OffsetToLock; *ppbData = reinterpret_cast(data); DWORD oldFlags = pResource->GetMapFlags(); // We need to remove the READONLY flags from the map flags // if there was ever a non-readonly upload. if (!(Flags & D3DLOCK_READONLY)) oldFlags &= ~D3DLOCK_READONLY; pResource->SetMapFlags(Flags | oldFlags); pResource->IncrementLockCount(); return D3D_OK; } HRESULT D3D9DeviceEx::FlushBuffer( D3D9CommonBuffer* pResource) { auto dstBuffer = pResource->GetBufferSlice(); auto srcSlice = pResource->GetMappedSlice(); D3D9Range& range = pResource->DirtyRange(); DxvkBufferSlice copySrcSlice; if (pResource->DoesStagingBufferUploads()) { D3D9BufferSlice slice = AllocTempBuffer(range.max - range.min); copySrcSlice = slice.slice; void* srcData = reinterpret_cast(srcSlice.mapPtr) + range.min; memcpy(slice.mapPtr, srcData, range.max - range.min); } else { copySrcSlice = DxvkBufferSlice(pResource->GetBuffer(), range.min, range.max - range.min); } EmitCs([ cDstSlice = dstBuffer, cSrcSlice = copySrcSlice, cDstOffset = range.min, cLength = range.max - range.min ] (DxvkContext* ctx) { ctx->copyBuffer( cDstSlice.buffer(), cDstSlice.offset() + cDstOffset, cSrcSlice.buffer(), cSrcSlice.offset(), cLength); }); pResource->GPUReadingRange().Conjoin(pResource->DirtyRange()); pResource->DirtyRange().Clear(); return D3D_OK; } HRESULT D3D9DeviceEx::UnlockBuffer( D3D9CommonBuffer* pResource) { D3D9DeviceLock lock = LockDevice(); if (pResource->DecrementLockCount() != 0) return D3D_OK; if (pResource->GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) return D3D_OK; if (pResource->DirtyRange().IsDegenerate()) return D3D_OK; pResource->SetMapFlags(0); if (pResource->Desc()->Pool != D3DPOOL_DEFAULT) return D3D_OK; FlushImplicit(FALSE); FlushBuffer(pResource); return D3D_OK; } void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { m_csThread.dispatchChunk(std::move(chunk)); m_csIsBusy = true; } void D3D9DeviceEx::FlushImplicit(BOOL StrongHint) { // Flush only if the GPU is about to go idle, in // order to keep the number of submissions low. uint32_t pending = m_dxvkDevice->pendingSubmissions(); if (StrongHint || pending <= MaxPendingSubmits) { auto now = dxvk::high_resolution_clock::now(); uint32_t delay = MinFlushIntervalUs + IncFlushIntervalUs * pending; // Prevent flushing too often in short intervals. if (now - m_lastFlush >= std::chrono::microseconds(delay)) Flush(); } } void D3D9DeviceEx::SynchronizeCsThread() { D3D9DeviceLock lock = LockDevice(); // Dispatch current chunk so that all commands // recorded prior to this function will be run FlushCsChunk(); if (m_csThread.isBusy()) m_csThread.synchronize(); } void D3D9DeviceEx::SetupFPU() { // Should match d3d9 float behaviour. #if defined(_MSC_VER) // For MSVC we can use these cross arch and platform funcs to set the FPU. // This will work on any platform, x86, x64, ARM, etc. // Clear exceptions. _clearfp(); // Disable exceptions _controlfp(_MCW_EM, _MCW_EM); #ifndef _WIN64 // Use 24 bit precision _controlfp(_PC_24, _MCW_PC); #endif // Round to nearest _controlfp(_RC_NEAR, _MCW_RC); #elif (defined(__GNUC__) || defined(__MINGW32__)) && (defined(__i386__) || defined(__x86_64__) || defined(__ia64)) // For GCC/MinGW we can use inline asm to set it. // This only works for x86 and x64 processors however. uint16_t control; // Get current control word. __asm__ __volatile__("fnstcw %0" : "=m" (*&control)); // Clear existing settings. control &= 0xF0C0; // Disable exceptions // Use 24 bit precision // Round to nearest control |= 0x003F; // Set new control word. __asm__ __volatile__("fldcw %0" : : "m" (*&control)); #else Logger::warn("D3D9DeviceEx::SetupFPU: not supported on this arch."); #endif } int64_t D3D9DeviceEx::DetermineInitialTextureMemory() { auto memoryProp = m_adapter->GetDXVKAdapter()->memoryProperties(); VkDeviceSize availableTextureMemory = 0; for (uint32_t i = 0; i < memoryProp.memoryHeapCount; i++) availableTextureMemory += memoryProp.memoryHeaps[i].size; constexpr VkDeviceSize Megabytes = 1024 * 1024; // The value returned is a 32-bit value, so we need to clamp it. VkDeviceSize maxMemory = (VkDeviceSize(m_d3d9Options.maxAvailableMemory) * Megabytes) - 1; availableTextureMemory = std::min(availableTextureMemory, maxMemory); return int64_t(availableTextureMemory); } Rc D3D9DeviceEx::CreateConstantBuffer( bool SSBO, VkDeviceSize Size, DxsoProgramType ShaderStage, DxsoConstantBuffers BufferType) { DxvkBufferCreateInfo info = { }; info.usage = SSBO ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; info.access = SSBO ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_UNIFORM_READ_BIT; info.size = Size; info.stages = ShaderStage == DxsoProgramType::VertexShader ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; VkMemoryPropertyFlags memoryFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; if (m_d3d9Options.deviceLocalConstantBuffers) memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; Rc buffer = m_dxvkDevice->createBuffer(info, memoryFlags); const uint32_t slotId = computeResourceSlotId( ShaderStage, DxsoBindingType::ConstantBuffer, BufferType); EmitCs([ cSlotId = slotId, cBuffer = buffer ] (DxvkContext* ctx) { ctx->bindResourceBuffer(cSlotId, DxvkBufferSlice(cBuffer, 0, cBuffer->info().size)); }); return buffer; } void D3D9DeviceEx::CreateConstantBuffers() { m_consts[DxsoProgramTypes::VertexShader].buffer = CreateConstantBuffer(m_dxsoOptions.vertexConstantBufferAsSSBO, m_vsLayout.totalSize(), DxsoProgramType::VertexShader, DxsoConstantBuffers::VSConstantBuffer); m_consts[DxsoProgramTypes::PixelShader].buffer = CreateConstantBuffer(false, m_psLayout.totalSize(), DxsoProgramType::PixelShader, DxsoConstantBuffers::PSConstantBuffer); m_vsClipPlanes = CreateConstantBuffer(false, caps::MaxClipPlanes * sizeof(D3D9ClipPlane), DxsoProgramType::VertexShader, DxsoConstantBuffers::VSClipPlanes); m_vsFixedFunction = CreateConstantBuffer(false, sizeof(D3D9FixedFunctionVS), DxsoProgramType::VertexShader, DxsoConstantBuffers::VSFixedFunction); m_psFixedFunction = CreateConstantBuffer(false, sizeof(D3D9FixedFunctionPS), DxsoProgramType::PixelShader, DxsoConstantBuffers::PSFixedFunction); m_psShared = CreateConstantBuffer(false, sizeof(D3D9SharedPS), DxsoProgramType::PixelShader, DxsoConstantBuffers::PSShared); m_vsVertexBlend = CreateConstantBuffer(true, CanSWVP() ? sizeof(D3D9FixedFunctionVertexBlendDataSW) : sizeof(D3D9FixedFunctionVertexBlendDataHW), DxsoProgramType::VertexShader, DxsoConstantBuffers::VSVertexBlendData); } template inline void D3D9DeviceEx::UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader) { const D3D9ConstantSets& constSet = m_consts[ShaderStage]; auto* dst = reinterpret_cast(pData); if (constSet.meta.maxConstIndexF) std::memcpy(dst->fConsts, Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4)); if (constSet.meta.maxConstIndexI) std::memcpy(dst->iConsts, Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i)); } template inline void D3D9DeviceEx::UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { const D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader]; auto dst = reinterpret_cast(pData); if (constSet.meta.maxConstIndexF) std::memcpy(dst + Layout.floatOffset(), Src.fConsts, constSet.meta.maxConstIndexF * sizeof(Vector4)); if (constSet.meta.maxConstIndexI) std::memcpy(dst + Layout.intOffset(), Src.iConsts, constSet.meta.maxConstIndexI * sizeof(Vector4i)); if (constSet.meta.maxConstIndexB) std::memcpy(dst + Layout.bitmaskOffset(), Src.bConsts, Layout.bitmaskSize()); } template inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { D3D9ConstantSets& constSet = m_consts[ShaderStage]; if (!constSet.dirty) return; constSet.dirty = false; DxvkBufferSliceHandle slice = constSet.buffer->allocSlice(); EmitCs([ cBuffer = constSet.buffer, cSlice = slice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cSlice); }); if constexpr (ShaderStage == DxsoProgramType::PixelShader) UploadHardwareConstantSet(slice.mapPtr, Src, Shader); else if (likely(!CanSWVP())) UploadHardwareConstantSet(slice.mapPtr, Src, Shader); else UploadSoftwareConstantSet(slice.mapPtr, Src, Layout, Shader); if (constSet.meta.needsConstantCopies) { Vector4* data = reinterpret_cast(slice.mapPtr); auto& shaderConsts = GetCommonShader(Shader)->GetConstants(); for (const auto& constant : shaderConsts) data[constant.uboIdx] = *reinterpret_cast(constant.float32); } } template void D3D9DeviceEx::UploadConstants() { if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) return UploadConstantSet(m_state.vsConsts, m_vsLayout, m_state.vertexShader); else return UploadConstantSet (m_state.psConsts, m_psLayout, m_state.pixelShader); } void D3D9DeviceEx::UpdateClipPlanes() { m_flags.clr(D3D9DeviceFlag::DirtyClipPlanes); auto slice = m_vsClipPlanes->allocSlice(); auto dst = reinterpret_cast(slice.mapPtr); for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i)) ? m_state.clipPlanes[i] : D3D9ClipPlane(); } EmitCs([ cBuffer = m_vsClipPlanes, cSlice = slice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cSlice); }); } template void D3D9DeviceEx::UpdatePushConstant(const void* pData) { struct ConstantData { uint8_t Data[Length]; }; auto* constData = reinterpret_cast(pData); EmitCs([ cData = *constData ](DxvkContext* ctx) { ctx->pushConstants(Offset, Length, &cData); }); } template void D3D9DeviceEx::UpdatePushConstant() { auto& rs = m_state.renderStates; if constexpr (Item == D3D9RenderStateItem::AlphaRef) { float alpha = float(rs[D3DRS_ALPHAREF] & 0xFF) / 255.0f; UpdatePushConstant(&alpha); } else if constexpr (Item == D3D9RenderStateItem::FogColor) { Vector4 color; DecodeD3DCOLOR(D3DCOLOR(rs[D3DRS_FOGCOLOR]), color.data); UpdatePushConstant(&color); } else if constexpr (Item == D3D9RenderStateItem::FogDensity) { float density = bit::cast(rs[D3DRS_FOGDENSITY]); UpdatePushConstant(&density); } else if constexpr (Item == D3D9RenderStateItem::FogEnd) { float end = bit::cast(rs[D3DRS_FOGEND]); UpdatePushConstant(&end); } else if constexpr (Item == D3D9RenderStateItem::FogScale) { float end = bit::cast(rs[D3DRS_FOGEND]); float start = bit::cast(rs[D3DRS_FOGSTART]); float scale = 1.0f / (end - start); UpdatePushConstant(&scale); } else if constexpr (Item == D3D9RenderStateItem::PointSize) { UpdatePushConstant(&rs[D3DRS_POINTSIZE]); } else if constexpr (Item == D3D9RenderStateItem::PointSizeMin) { UpdatePushConstant(&rs[D3DRS_POINTSIZE_MIN]); } else if constexpr (Item == D3D9RenderStateItem::PointSizeMax) { UpdatePushConstant(&rs[D3DRS_POINTSIZE_MAX]); } else if constexpr (Item == D3D9RenderStateItem::PointScaleA) { float scale = bit::cast(rs[D3DRS_POINTSCALE_A]); scale /= float(m_state.viewport.Height * m_state.viewport.Height); UpdatePushConstant(&scale); } else if constexpr (Item == D3D9RenderStateItem::PointScaleB) { float scale = bit::cast(rs[D3DRS_POINTSCALE_B]); scale /= float(m_state.viewport.Height * m_state.viewport.Height); UpdatePushConstant(&scale); } else if constexpr (Item == D3D9RenderStateItem::PointScaleC) { float scale = bit::cast(rs[D3DRS_POINTSCALE_C]); scale /= float(m_state.viewport.Height * m_state.viewport.Height); UpdatePushConstant(&scale); } else Logger::warn("D3D9: Invalid push constant set to update."); } void D3D9DeviceEx::Flush() { D3D9DeviceLock lock = LockDevice(); m_initializer->Flush(); m_converter->Flush(); if (m_csIsBusy || !m_csChunk->empty()) { // Add commands to flush the threaded // context, then flush the command list EmitCs([](DxvkContext* ctx) { ctx->flushCommandList(); }); FlushCsChunk(); // Reset flush timer used for implicit flushes m_lastFlush = dxvk::high_resolution_clock::now(); m_csIsBusy = false; } } inline void D3D9DeviceEx::UpdateBoundRTs(uint32_t index) { const uint32_t bit = 1 << index; m_boundRTs &= ~bit; if (m_state.renderTargets[index] != nullptr && !m_state.renderTargets[index]->IsNull()) m_boundRTs |= bit; } inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) { const uint32_t bit = 1 << index; m_activeRTs &= ~bit; if ((m_boundRTs & bit) != 0 && m_state.renderTargets[index]->GetBaseTexture() != nullptr && m_state.renderStates[ColorWriteIndex(index)]) m_activeRTs |= bit; UpdateActiveHazardsRT(bit); } inline void D3D9DeviceEx::UpdateActiveTextures(uint32_t index, DWORD combinedUsage) { const uint32_t bit = 1 << index; m_activeRTTextures &= ~bit; m_activeDSTextures &= ~bit; m_activeTextures &= ~bit; m_activeTexturesToUpload &= ~bit; m_activeTexturesToGen &= ~bit; auto tex = GetCommonTexture(m_state.textures[index]); if (tex != nullptr) { m_activeTextures |= bit; if (unlikely(tex->IsRenderTarget())) m_activeRTTextures |= bit; if (unlikely(tex->IsDepthStencil())) m_activeDSTextures |= bit; if (unlikely(tex->NeedsAnyUpload())) m_activeTexturesToUpload |= bit; if (unlikely(tex->NeedsMipGen())) m_activeTexturesToGen |= bit; } if (unlikely(combinedUsage & D3DUSAGE_RENDERTARGET)) UpdateActiveHazardsRT(UINT32_MAX); if (unlikely(combinedUsage & D3DUSAGE_DEPTHSTENCIL)) UpdateActiveHazardsDS(bit); } inline void D3D9DeviceEx::UpdateActiveHazardsRT(uint32_t rtMask) { auto masks = m_psShaderMasks; masks.rtMask &= m_activeRTs & rtMask; masks.samplerMask &= m_activeRTTextures; m_activeHazardsRT = m_activeHazardsRT & (~rtMask); for (uint32_t rtIdx : bit::BitMask(masks.rtMask)) { for (uint32_t samplerIdx : bit::BitMask(masks.samplerMask)) { D3D9Surface* rtSurf = m_state.renderTargets[rtIdx].ptr(); IDirect3DBaseTexture9* rtBase = rtSurf->GetBaseTexture(); IDirect3DBaseTexture9* texBase = m_state.textures[samplerIdx]; // HACK: Don't mark for hazards if we aren't rendering to mip 0! // Some games use screenspace passes like this for blurring // Sampling from mip 0 (texture) -> mip 1 (rt) // and we'd trigger the hazard path otherwise which is unnecessary, // and would shove us into GENERAL and emitting readback barriers. if (likely(rtSurf->GetMipLevel() != 0 || rtBase != texBase)) continue; m_activeHazardsRT |= 1 << rtIdx; } } } inline void D3D9DeviceEx::UpdateActiveHazardsDS(uint32_t texMask) { m_activeHazardsDS = m_activeHazardsDS & (~texMask); if (m_state.depthStencil != nullptr && m_state.depthStencil->GetBaseTexture() != nullptr) { uint32_t samplerMask = m_activeDSTextures & texMask; for (uint32_t samplerIdx : bit::BitMask(samplerMask)) { IDirect3DBaseTexture9* dsBase = m_state.depthStencil->GetBaseTexture(); IDirect3DBaseTexture9* texBase = m_state.textures[samplerIdx]; if (likely(dsBase != texBase)) continue; m_activeHazardsDS |= 1 << samplerIdx; } } } void D3D9DeviceEx::MarkRenderHazards() { for (uint32_t rtIdx : bit::BitMask(m_activeHazardsRT)) { // Guaranteed to not be nullptr... auto tex = m_state.renderTargets[rtIdx]->GetCommonTexture(); if (unlikely(!tex->MarkHazardous())) { TransitionImage(tex, VK_IMAGE_LAYOUT_GENERAL); m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); } } } void D3D9DeviceEx::UploadManagedTexture(D3D9CommonTexture* pResource) { for (uint32_t subresource = 0; subresource < pResource->CountSubresources(); subresource++) { if (!pResource->NeedsUpload(subresource) || pResource->GetBuffer(subresource) == nullptr) continue; this->FlushImage(pResource, subresource); } pResource->ClearDirtyBoxes(); pResource->ClearNeedsUpload(); } void D3D9DeviceEx::UploadManagedTextures(uint32_t mask) { // Guaranteed to not be nullptr... for (uint32_t texIdx : bit::BitMask(mask)) UploadManagedTexture(GetCommonTexture(m_state.textures[texIdx])); m_activeTexturesToUpload &= ~mask; } void D3D9DeviceEx::GenerateTextureMips(uint32_t mask) { for (uint32_t texIdx : bit::BitMask(mask)) { // Guaranteed to not be nullptr... auto texInfo = GetCommonTexture(m_state.textures[texIdx]); if (texInfo->NeedsMipGen()) { this->EmitGenerateMips(texInfo); texInfo->SetNeedsMipGen(false); } } m_activeTexturesToGen &= ~mask; } void D3D9DeviceEx::MarkTextureMipsDirty(D3D9CommonTexture* pResource) { pResource->SetNeedsMipGen(true); pResource->MarkAllWrittenByGPU(); for (uint32_t i : bit::BitMask(m_activeTextures)) { // Guaranteed to not be nullptr... auto texInfo = GetCommonTexture(m_state.textures[i]); if (texInfo == pResource) { m_activeTexturesToGen |= 1 << i; // We can early out here, no need to add another index for this. break; } } } void D3D9DeviceEx::MarkTextureMipsUnDirty(D3D9CommonTexture* pResource) { pResource->SetNeedsMipGen(false); for (uint32_t i : bit::BitMask(m_activeTextures)) { // Guaranteed to not be nullptr... auto texInfo = GetCommonTexture(m_state.textures[i]); if (texInfo == pResource) m_activeTexturesToGen &= ~(1 << i); } } void D3D9DeviceEx::MarkTextureUploaded(D3D9CommonTexture* pResource) { for (uint32_t i : bit::BitMask(m_activeTextures)) { // Guaranteed to not be nullptr... auto texInfo = GetCommonTexture(m_state.textures[i]); if (texInfo == pResource) m_activeTexturesToUpload &= ~(1 << i); } } template void D3D9DeviceEx::UpdatePointMode() { if constexpr (!Points) { m_lastPointMode = 0; EmitCs([](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, 0); }); } else { auto& rs = m_state.renderStates; const bool scale = rs[D3DRS_POINTSCALEENABLE] && !UseProgrammableVS(); const bool sprite = rs[D3DRS_POINTSPRITEENABLE]; const uint32_t scaleBit = scale ? 1u : 0u; const uint32_t spriteBit = sprite ? 2u : 0u; uint32_t mode = scaleBit | spriteBit; if (rs[D3DRS_POINTSCALEENABLE] && m_flags.test(D3D9DeviceFlag::DirtyPointScale)) { m_flags.clr(D3D9DeviceFlag::DirtyPointScale); UpdatePushConstant(); UpdatePushConstant(); UpdatePushConstant(); } if (unlikely(mode != m_lastPointMode)) { EmitCs([cMode = mode] (DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, cMode); }); m_lastPointMode = mode; } } } void D3D9DeviceEx::UpdateFog() { auto& rs = m_state.renderStates; bool fogEnabled = rs[D3DRS_FOGENABLE]; bool pixelFog = rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE && fogEnabled; bool vertexFog = rs[D3DRS_FOGVERTEXMODE] != D3DFOG_NONE && fogEnabled && !pixelFog; auto UpdateFogConstants = [&](D3DFOGMODE FogMode) { if (m_flags.test(D3D9DeviceFlag::DirtyFogColor)) { m_flags.clr(D3D9DeviceFlag::DirtyFogColor); UpdatePushConstant(); } if (FogMode == D3DFOG_LINEAR) { if (m_flags.test(D3D9DeviceFlag::DirtyFogScale)) { m_flags.clr(D3D9DeviceFlag::DirtyFogScale); UpdatePushConstant(); } if (m_flags.test(D3D9DeviceFlag::DirtyFogEnd)) { m_flags.clr(D3D9DeviceFlag::DirtyFogEnd); UpdatePushConstant(); } } else if (FogMode == D3DFOG_EXP || FogMode == D3DFOG_EXP2) { if (m_flags.test(D3D9DeviceFlag::DirtyFogDensity)) { m_flags.clr(D3D9DeviceFlag::DirtyFogDensity); UpdatePushConstant(); } } }; if (vertexFog) { D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGVERTEXMODE]); UpdateFogConstants(mode); if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { m_flags.clr(D3D9DeviceFlag::DirtyFogState); EmitCs([cMode = mode] (DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true); ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, cMode); ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE); }); } } else if (pixelFog) { D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGTABLEMODE]); UpdateFogConstants(mode); if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { m_flags.clr(D3D9DeviceFlag::DirtyFogState); EmitCs([cMode = mode] (DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true); ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE); ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, cMode); }); } } else { if (fogEnabled) UpdateFogConstants(D3DFOG_NONE); if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) { m_flags.clr(D3D9DeviceFlag::DirtyFogState); EmitCs([cEnabled = fogEnabled] (DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, cEnabled); ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE); ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE); }); } } } void D3D9DeviceEx::BindFramebuffer() { m_flags.clr(D3D9DeviceFlag::DirtyFramebuffer); DxvkRenderTargets attachments; bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE]; // D3D9 doesn't have the concept of a framebuffer object, // so we'll just create a new one every time the render // target bindings are updated. Set up the attachments. VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM; for (uint32_t i : bit::BitMask(m_boundRTs)) { const DxvkImageCreateInfo& rtImageInfo = m_state.renderTargets[i]->GetCommonTexture()->GetImage()->info(); if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM)) sampleCount = rtImageInfo.sampleCount; else if (unlikely(sampleCount != rtImageInfo.sampleCount)) continue; attachments.color[i] = { m_state.renderTargets[i]->GetRenderTargetView(srgb), m_state.renderTargets[i]->GetRenderTargetLayout() }; } if (m_state.depthStencil != nullptr) { const DxvkImageCreateInfo& dsImageInfo = m_state.depthStencil->GetCommonTexture()->GetImage()->info(); const bool depthWrite = m_state.renderStates[D3DRS_ZWRITEENABLE]; if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM || sampleCount == dsImageInfo.sampleCount)) { attachments.depth = { m_state.depthStencil->GetDepthStencilView(), m_state.depthStencil->GetDepthStencilLayout(depthWrite, m_activeHazardsDS != 0) }; } } // Create and bind the framebuffer object to the context EmitCs([ cAttachments = std::move(attachments) ] (DxvkContext* ctx) { ctx->bindRenderTargets(cAttachments); }); } void D3D9DeviceEx::BindViewportAndScissor() { m_flags.clr(D3D9DeviceFlag::DirtyViewportScissor); VkViewport viewport; VkRect2D scissor; // D3D9's coordinate system has its origin in the bottom left, // but the viewport coordinates are aligned to the top-left // corner so we can get away with flipping the viewport. const D3DVIEWPORT9& vp = m_state.viewport; // Correctness Factor for 1/2 texel offset // We need to bias this slightly to make // imprecision in games happy. // Originally we did this only for powers of two // resolutions but since NEAREST filtering fixed to // truncate, we need to do this all the time now. constexpr float cf = 0.5f - (1.0f / 128.0f); // How much to bias MinZ by to avoid a depth // degenerate viewport. constexpr float zBias = 0.001f; viewport = VkViewport{ float(vp.X) + cf, float(vp.Height + vp.Y) + cf, float(vp.Width), -float(vp.Height), vp.MinZ, std::max(vp.MaxZ, vp.MinZ + zBias), }; // Scissor rectangles. Vulkan does not provide an easy way // to disable the scissor test, so we'll have to set scissor // rects that are at least as large as the framebuffer. bool enableScissorTest = m_state.renderStates[D3DRS_SCISSORTESTENABLE]; if (enableScissorTest) { RECT sr = m_state.scissorRect; VkOffset2D srPosA; srPosA.x = std::max(0, sr.left); srPosA.x = std::max(vp.X, srPosA.x); srPosA.y = std::max(0, sr.top); srPosA.y = std::max(vp.Y, srPosA.y); VkOffset2D srPosB; srPosB.x = std::max(srPosA.x, sr.right); srPosB.x = std::min(vp.X + vp.Width, srPosB.x); srPosB.y = std::max(srPosA.y, sr.bottom); srPosB.y = std::min(vp.Y + vp.Height, srPosB.y); VkExtent2D srSize; srSize.width = uint32_t(srPosB.x - srPosA.x); srSize.height = uint32_t(srPosB.y - srPosA.y); scissor = VkRect2D{ srPosA, srSize }; } else { scissor = VkRect2D{ VkOffset2D { int32_t(vp.X), int32_t(vp.Y) }, VkExtent2D { vp.Width, vp.Height }}; } EmitCs([ cViewport = viewport, cScissor = scissor ] (DxvkContext* ctx) { ctx->setViewports( 1, &cViewport, &cScissor); }); } void D3D9DeviceEx::BindMultiSampleState() { m_flags.clr(D3D9DeviceFlag::DirtyMultiSampleState); DxvkMultisampleState msState; msState.sampleMask = m_flags.test(D3D9DeviceFlag::ValidSampleMask) ? m_state.renderStates[D3DRS_MULTISAMPLEMASK] : 0xffffffff; msState.enableAlphaToCoverage = IsAlphaToCoverageEnabled(); EmitCs([ cState = msState ] (DxvkContext* ctx) { ctx->setMultisampleState(cState); }); } void D3D9DeviceEx::BindBlendState() { m_flags.clr(D3D9DeviceFlag::DirtyBlendState); auto& state = m_state.renderStates; bool separateAlpha = state[D3DRS_SEPARATEALPHABLENDENABLE]; DxvkBlendMode mode; mode.enableBlending = state[D3DRS_ALPHABLENDENABLE] != FALSE; D3D9BlendState color, alpha; color.Src = D3DBLEND(state[D3DRS_SRCBLEND]); color.Dst = D3DBLEND(state[D3DRS_DESTBLEND]); color.Op = D3DBLENDOP(state[D3DRS_BLENDOP]); FixupBlendState(color); if (separateAlpha) { alpha.Src = D3DBLEND(state[D3DRS_SRCBLENDALPHA]); alpha.Dst = D3DBLEND(state[D3DRS_DESTBLENDALPHA]); alpha.Op = D3DBLENDOP(state[D3DRS_BLENDOPALPHA]); FixupBlendState(alpha); } else alpha = color; mode.colorSrcFactor = DecodeBlendFactor(color.Src, false); mode.colorDstFactor = DecodeBlendFactor(color.Dst, false); mode.colorBlendOp = DecodeBlendOp (color.Op); mode.alphaSrcFactor = DecodeBlendFactor(alpha.Src, true); mode.alphaDstFactor = DecodeBlendFactor(alpha.Dst, true); mode.alphaBlendOp = DecodeBlendOp (alpha.Op); mode.writeMask = state[ColorWriteIndex(0)]; std::array extraWriteMasks; for (uint32_t i = 0; i < 3; i++) extraWriteMasks[i] = state[ColorWriteIndex(i + 1)]; EmitCs([ cMode = mode, cWriteMasks = extraWriteMasks, cAlphaMasks = m_alphaSwizzleRTs ](DxvkContext* ctx) { for (uint32_t i = 0; i < 4; i++) { DxvkBlendMode mode = cMode; if (i != 0) mode.writeMask = cWriteMasks[i - 1]; const bool alphaSwizzle = cAlphaMasks & (1 << i); auto NormalizeFactor = [alphaSwizzle](VkBlendFactor Factor) { if (alphaSwizzle) { if (Factor == VK_BLEND_FACTOR_DST_ALPHA) return VK_BLEND_FACTOR_ONE; else if (Factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA) return VK_BLEND_FACTOR_ZERO; } return Factor; }; mode.colorSrcFactor = NormalizeFactor(mode.colorSrcFactor); mode.colorDstFactor = NormalizeFactor(mode.colorDstFactor); mode.alphaSrcFactor = NormalizeFactor(mode.alphaSrcFactor); mode.alphaDstFactor = NormalizeFactor(mode.alphaDstFactor); ctx->setBlendMode(i, mode); } }); } void D3D9DeviceEx::BindBlendFactor() { DxvkBlendConstants blendConstants; DecodeD3DCOLOR( D3DCOLOR(m_state.renderStates[D3DRS_BLENDFACTOR]), reinterpret_cast(&blendConstants)); EmitCs([ cBlendConstants = blendConstants ](DxvkContext* ctx) { ctx->setBlendConstants(cBlendConstants); }); } void D3D9DeviceEx::BindDepthStencilState() { m_flags.clr(D3D9DeviceFlag::DirtyDepthStencilState); auto& rs = m_state.renderStates; bool stencil = rs[D3DRS_STENCILENABLE]; bool twoSidedStencil = stencil && rs[D3DRS_TWOSIDEDSTENCILMODE]; DxvkDepthStencilState state; state.enableDepthTest = rs[D3DRS_ZENABLE] != FALSE; state.enableDepthWrite = rs[D3DRS_ZWRITEENABLE] != FALSE; state.enableStencilTest = stencil; state.depthCompareOp = DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ZFUNC])); if (stencil) { state.stencilOpFront.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILFAIL])); state.stencilOpFront.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILPASS])); state.stencilOpFront.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILZFAIL])); state.stencilOpFront.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_STENCILFUNC])); state.stencilOpFront.compareMask = uint32_t(rs[D3DRS_STENCILMASK]); state.stencilOpFront.writeMask = uint32_t(rs[D3DRS_STENCILWRITEMASK]); state.stencilOpFront.reference = 0; } else state.stencilOpFront = VkStencilOpState(); if (twoSidedStencil) { state.stencilOpBack.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILFAIL])); state.stencilOpBack.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILPASS])); state.stencilOpBack.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILZFAIL])); state.stencilOpBack.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_CCW_STENCILFUNC])); state.stencilOpBack.compareMask = state.stencilOpFront.compareMask; state.stencilOpBack.writeMask = state.stencilOpFront.writeMask; state.stencilOpBack.reference = 0; } else state.stencilOpBack = state.stencilOpFront; EmitCs([ cState = state ](DxvkContext* ctx) { ctx->setDepthStencilState(cState); }); } void D3D9DeviceEx::BindRasterizerState() { m_flags.clr(D3D9DeviceFlag::DirtyRasterizerState); auto& rs = m_state.renderStates; DxvkRasterizerState state; state.cullMode = DecodeCullMode(D3DCULL(rs[D3DRS_CULLMODE])); state.depthBiasEnable = IsDepthBiasEnabled(); state.depthClipEnable = true; state.frontFace = VK_FRONT_FACE_CLOCKWISE; state.polygonMode = DecodeFillMode(D3DFILLMODE(rs[D3DRS_FILLMODE])); state.conservativeMode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; state.sampleCount = 0; EmitCs([ cState = state ](DxvkContext* ctx) { ctx->setRasterizerState(cState); }); } void D3D9DeviceEx::BindDepthBias() { m_flags.clr(D3D9DeviceFlag::DirtyDepthBias); auto& rs = m_state.renderStates; float depthBias = bit::cast(rs[D3DRS_DEPTHBIAS]) * m_depthBiasScale; float slopeScaledDepthBias = bit::cast(rs[D3DRS_SLOPESCALEDEPTHBIAS]); DxvkDepthBias biases; biases.depthBiasConstant = depthBias; biases.depthBiasSlope = slopeScaledDepthBias; biases.depthBiasClamp = 0.0f; EmitCs([ cBiases = biases ](DxvkContext* ctx) { ctx->setDepthBias(cBiases); }); } void D3D9DeviceEx::BindAlphaTestState() { m_flags.clr(D3D9DeviceFlag::DirtyAlphaTestState); auto& rs = m_state.renderStates; VkCompareOp alphaOp = IsAlphaTestEnabled() ? DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ALPHAFUNC])) : VK_COMPARE_OP_ALWAYS; EmitCs([cAlphaOp = alphaOp] (DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaCompareOp, cAlphaOp); }); } void D3D9DeviceEx::BindDepthStencilRefrence() { auto& rs = m_state.renderStates; uint32_t ref = uint32_t(rs[D3DRS_STENCILREF]) & 0xff; EmitCs([cRef = ref] (DxvkContext* ctx) { ctx->setStencilReference(cRef); }); } void D3D9DeviceEx::BindSampler(DWORD Sampler) { auto& state = m_state.samplerStates[Sampler]; D3D9SamplerKey key; key.AddressU = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSU]); key.AddressV = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSV]); key.AddressW = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSW]); key.MagFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MAGFILTER]); key.MinFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MINFILTER]); key.MipFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MIPFILTER]); key.MaxAnisotropy = state[D3DSAMP_MAXANISOTROPY]; key.MipmapLodBias = bit::cast(state[D3DSAMP_MIPMAPLODBIAS]); key.MaxMipLevel = state[D3DSAMP_MAXMIPLEVEL]; key.BorderColor = D3DCOLOR(state[D3DSAMP_BORDERCOLOR]); key.Depth = m_depthTextures & (1u << Sampler); if (m_d3d9Options.samplerAnisotropy != -1) { if (key.MagFilter == D3DTEXF_LINEAR) key.MagFilter = D3DTEXF_ANISOTROPIC; if (key.MinFilter == D3DTEXF_LINEAR) key.MinFilter = D3DTEXF_ANISOTROPIC; key.MaxAnisotropy = m_d3d9Options.samplerAnisotropy; } NormalizeSamplerKey(key); auto samplerInfo = RemapStateSamplerShader(Sampler); const uint32_t slot = computeResourceSlotId( samplerInfo.first, DxsoBindingType::Image, samplerInfo.second); EmitCs([this, cSlot = slot, cKey = key ] (DxvkContext* ctx) { auto pair = m_samplers.find(cKey); if (pair != m_samplers.end()) { ctx->bindResourceSampler(cSlot, pair->second); return; } auto mipFilter = DecodeMipFilter(cKey.MipFilter); DxvkSamplerCreateInfo info; info.addressModeU = DecodeAddressMode(cKey.AddressU); info.addressModeV = DecodeAddressMode(cKey.AddressV); info.addressModeW = DecodeAddressMode(cKey.AddressW); info.compareToDepth = cKey.Depth; info.compareOp = cKey.Depth ? VK_COMPARE_OP_LESS_OR_EQUAL : VK_COMPARE_OP_NEVER; info.magFilter = DecodeFilter(cKey.MagFilter); info.minFilter = DecodeFilter(cKey.MinFilter); info.mipmapMode = mipFilter.MipFilter; info.maxAnisotropy = float(cKey.MaxAnisotropy); info.useAnisotropy = cKey.MaxAnisotropy > 1; info.mipmapLodBias = cKey.MipmapLodBias; info.mipmapLodMin = mipFilter.MipsEnabled ? float(cKey.MaxMipLevel) : 0; info.mipmapLodMax = mipFilter.MipsEnabled ? FLT_MAX : 0; info.usePixelCoord = VK_FALSE; DecodeD3DCOLOR(cKey.BorderColor, info.borderColor.float32); if (!m_dxvkDevice->features().extCustomBorderColor.customBorderColorWithoutFormat) { // HACK: Let's get OPAQUE_WHITE border color over // TRANSPARENT_BLACK if the border RGB is white. if (info.borderColor.float32[0] == 1.0f && info.borderColor.float32[1] == 1.0f && info.borderColor.float32[2] == 1.0f && !m_dxvkDevice->features().extCustomBorderColor.customBorderColors) { // Then set the alpha to 1. info.borderColor.float32[3] = 1.0f; } } try { auto sampler = m_dxvkDevice->createSampler(info); m_samplers.insert(std::make_pair(cKey, sampler)); ctx->bindResourceSampler(cSlot, std::move(sampler)); m_samplerCount++; } catch (const DxvkError& e) { Logger::err(e.message()); } }); } void D3D9DeviceEx::BindTexture(DWORD StateSampler) { auto shaderSampler = RemapStateSamplerShader(StateSampler); uint32_t slot = computeResourceSlotId(shaderSampler.first, DxsoBindingType::Image, uint32_t(shaderSampler.second)); const bool srgb = m_state.samplerStates[StateSampler][D3DSAMP_SRGBTEXTURE] & 0x1; D3D9CommonTexture* commonTex = GetCommonTexture(m_state.textures[StateSampler]); // For all our pixel shader textures if (likely(StateSampler < 16)) { const uint32_t offset = StateSampler * 2; const uint32_t textureType = uint32_t(commonTex->GetType() - D3DRTYPE_TEXTURE); const uint32_t textureBitMask = 0b11u << offset; const uint32_t textureBits = textureType << offset; if ((m_samplerTypeBitfield & textureBitMask) != textureBits) { m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); m_samplerTypeBitfield &= ~textureBitMask; m_samplerTypeBitfield |= textureBits; } } EmitCs([ cSlot = slot, cImageView = commonTex->GetSampleView(srgb) ](DxvkContext* ctx) { ctx->bindResourceView(cSlot, cImageView, nullptr); }); } void D3D9DeviceEx::UnbindTextures(uint32_t mask) { // For all our pixel shader textures const uint32_t pixelShaderMask = (1u << 16u) - 1; // Update the sampler type bitfield to 0 for the // unbound textures. uint32_t typeMask = 0; for (uint32_t i : bit::BitMask(mask & pixelShaderMask)) typeMask |= 0b11u << (i * 2u); if ((m_samplerTypeBitfield & typeMask) != 0) { m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); m_samplerTypeBitfield &= ~typeMask; } EmitCs([ cMask = mask ](DxvkContext* ctx) { for (uint32_t i : bit::BitMask(cMask)) { auto shaderSampler = RemapStateSamplerShader(i); uint32_t slot = computeResourceSlotId(shaderSampler.first, DxsoBindingType::Image, uint32_t(shaderSampler.second)); ctx->bindResourceView(slot, nullptr, nullptr); } }); } void D3D9DeviceEx::UndirtySamplers(uint32_t mask) { for (uint32_t i : bit::BitMask(mask)) BindSampler(i); m_dirtySamplerStates &= ~mask; } void D3D9DeviceEx::UndirtyTextures(uint32_t usedMask) { const uint32_t activeMask = usedMask & m_activeTextures; const uint32_t inactiveMask = usedMask & ~m_activeTextures; for (uint32_t i : bit::BitMask(activeMask)) BindTexture(i); if (inactiveMask) UnbindTextures(inactiveMask); m_dirtyTextures &= ~usedMask; } void D3D9DeviceEx::MarkTextureBindingDirty(IDirect3DBaseTexture9* texture) { D3D9DeviceLock lock = LockDevice(); for (uint32_t i : bit::BitMask(m_activeTextures)) { if (m_state.textures[i] == texture) m_dirtyTextures |= 1u << i; } } D3D9DrawInfo D3D9DeviceEx::GenerateDrawInfo( D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, UINT InstanceCount) { D3D9DrawInfo drawInfo; drawInfo.vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); drawInfo.instanceCount = m_iaState.streamsInstanced & m_iaState.streamsUsed ? InstanceCount : 1u; return drawInfo; } uint32_t D3D9DeviceEx::GetInstanceCount() const { return std::max(m_state.streamFreq[0] & 0x7FFFFFu, 1u); } void D3D9DeviceEx::PrepareDraw(D3DPRIMITIVETYPE PrimitiveType) { if (unlikely(m_activeHazardsRT != 0)) { EmitCs([](DxvkContext* ctx) { ctx->emitRenderTargetReadbackBarrier(); }); if (m_d3d9Options.generalHazards) MarkRenderHazards(); } if (unlikely((m_lastHazardsDS == 0) != (m_activeHazardsDS == 0))) { m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); m_lastHazardsDS = m_activeHazardsDS; } for (uint32_t i = 0; i < caps::MaxStreams; i++) { auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); if (vbo != nullptr && vbo->NeedsUpload()) FlushBuffer(vbo); } const uint32_t usedSamplerMask = m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask; const uint32_t usedTextureMask = m_activeTextures & usedSamplerMask; const uint32_t texturesToUpload = m_activeTexturesToUpload & usedTextureMask; if (unlikely(texturesToUpload != 0)) UploadManagedTextures(texturesToUpload); const uint32_t texturesToGen = m_activeTexturesToGen & usedTextureMask; if (unlikely(texturesToGen != 0)) GenerateTextureMips(texturesToGen); auto* ibo = GetCommonBuffer(m_state.indices); if (ibo != nullptr && ibo->NeedsUpload()) FlushBuffer(ibo); UpdateFog(); if (m_flags.test(D3D9DeviceFlag::DirtyFramebuffer)) BindFramebuffer(); if (m_flags.test(D3D9DeviceFlag::DirtyViewportScissor)) BindViewportAndScissor(); const uint32_t activeDirtySamplers = m_dirtySamplerStates & usedTextureMask; if (activeDirtySamplers) UndirtySamplers(activeDirtySamplers); const uint32_t usedDirtyTextures = m_dirtyTextures & usedSamplerMask; if (usedDirtyTextures) UndirtyTextures(usedDirtyTextures); if (m_flags.test(D3D9DeviceFlag::DirtyBlendState)) BindBlendState(); if (m_flags.test(D3D9DeviceFlag::DirtyDepthStencilState)) BindDepthStencilState(); if (m_flags.test(D3D9DeviceFlag::DirtyRasterizerState)) BindRasterizerState(); if (m_flags.test(D3D9DeviceFlag::DirtyDepthBias)) BindDepthBias(); if (m_flags.test(D3D9DeviceFlag::DirtyMultiSampleState)) BindMultiSampleState(); if (m_flags.test(D3D9DeviceFlag::DirtyAlphaTestState)) BindAlphaTestState(); if (m_flags.test(D3D9DeviceFlag::DirtyClipPlanes)) UpdateClipPlanes(); if (PrimitiveType == D3DPT_POINTLIST) UpdatePointMode(); else if (m_lastPointMode != 0) UpdatePointMode(); if (likely(UseProgrammableVS())) { if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) { m_flags.set(D3D9DeviceFlag::DirtyInputLayout); BindShader( GetCommonShader(m_state.vertexShader), GetVertexShaderPermutation()); } UploadConstants(); if (likely(!CanSWVP())) { UpdateBoolSpecConstantVertex( m_state.vsConsts.bConsts[0] & m_consts[DxsoProgramType::VertexShader].meta.boolConstantMask); } else UpdateBoolSpecConstantVertex(0); } else { UpdateBoolSpecConstantVertex(0); UpdateFixedFunctionVS(); } if (m_flags.test(D3D9DeviceFlag::DirtyInputLayout)) BindInputLayout(); auto UpdateSamplerTypes = [&](uint32_t types, uint32_t projections, uint32_t fetch4) { if (m_lastSamplerTypeBitfield != types) UpdateSamplerSpecConsant(types); if (m_lastProjectionBitfield != projections) UpdateProjectionSpecConstant(projections); if (m_lastFetch4 != fetch4) UpdateFetch4SpecConstant(fetch4); }; if (likely(UseProgrammablePS())) { UploadConstants(); const uint32_t psTextureMask = m_activeTextures & m_psShaderMasks.samplerMask; uint32_t fetch4 = m_fetch4 & psTextureMask; uint32_t projected = m_projectionBitfield & psTextureMask; if (GetCommonShader(m_state.pixelShader)->GetInfo().majorVersion() >= 2) UpdateSamplerTypes(m_d3d9Options.forceSamplerTypeSpecConstants ? m_samplerTypeBitfield : 0u, 0u, fetch4); else UpdateSamplerTypes(m_samplerTypeBitfield, projected, fetch4); // For implicit samplers... UpdateBoolSpecConstantPixel( m_state.psConsts.bConsts[0] & m_consts[DxsoProgramType::PixelShader].meta.boolConstantMask); } else { UpdateBoolSpecConstantPixel(0); UpdateSamplerTypes(0u, 0u, 0u); UpdateFixedFunctionPS(); } const uint32_t depthTextureMask = m_depthTextures & usedTextureMask; if (depthTextureMask != m_lastSamplerDepthMode) UpdateSamplerDepthModeSpecConstant(depthTextureMask); if (m_flags.test(D3D9DeviceFlag::DirtySharedPixelShaderData)) { m_flags.clr(D3D9DeviceFlag::DirtySharedPixelShaderData); DxvkBufferSliceHandle slice = m_psShared->allocSlice(); EmitCs([ cBuffer = m_psShared, cSlice = slice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cSlice); }); D3D9SharedPS* data = reinterpret_cast(slice.mapPtr); for (uint32_t i = 0; i < caps::TextureStageCount; i++) { DecodeD3DCOLOR(D3DCOLOR(m_state.textureStages[i][DXVK_TSS_CONSTANT]), data->Stages[i].Constant); // Flip major-ness so we can get away with a nice easy // dot in the shader without complex access data->Stages[i].BumpEnvMat[0][0] = bit::cast(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT00]); data->Stages[i].BumpEnvMat[1][0] = bit::cast(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT01]); data->Stages[i].BumpEnvMat[0][1] = bit::cast(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT10]); data->Stages[i].BumpEnvMat[1][1] = bit::cast(m_state.textureStages[i][DXVK_TSS_BUMPENVMAT11]); data->Stages[i].BumpEnvLScale = bit::cast(m_state.textureStages[i][DXVK_TSS_BUMPENVLSCALE]); data->Stages[i].BumpEnvLOffset = bit::cast(m_state.textureStages[i][DXVK_TSS_BUMPENVLOFFSET]); } } if (m_flags.test(D3D9DeviceFlag::DirtyDepthBounds)) { m_flags.clr(D3D9DeviceFlag::DirtyDepthBounds); DxvkDepthBounds db; db.enableDepthBounds = (m_state.renderStates[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB)); db.minDepthBounds = bit::cast(m_state.renderStates[D3DRS_ADAPTIVETESS_Z]); db.maxDepthBounds = bit::cast(m_state.renderStates[D3DRS_ADAPTIVETESS_W]); EmitCs([ cDepthBounds = db ] (DxvkContext* ctx) { ctx->setDepthBounds(cDepthBounds); }); } } template void D3D9DeviceEx::BindShader( const D3D9CommonShader* pShaderModule, D3D9ShaderPermutation Permutation) { EmitCs([ cShader = pShaderModule->GetShader(Permutation) ] (DxvkContext* ctx) { ctx->bindShader(GetShaderStage(ShaderStage), cShader); }); } void D3D9DeviceEx::BindInputLayout() { m_flags.clr(D3D9DeviceFlag::DirtyInputLayout); if (m_state.vertexDecl == nullptr) { EmitCs([&cIaState = m_iaState] (DxvkContext* ctx) { cIaState.streamsUsed = 0; ctx->setInputLayout(0, nullptr, 0, nullptr); }); } else { std::array streamFreq; for (uint32_t i = 0; i < caps::MaxStreams; i++) streamFreq[i] = m_state.streamFreq[i]; Com vertexDecl = m_state.vertexDecl; Com vertexShader; if (UseProgrammableVS()) vertexShader = m_state.vertexShader; EmitCs([ &cIaState = m_iaState, cVertexDecl = std::move(vertexDecl), cVertexShader = std::move(vertexShader), cStreamsInstanced = m_instancedData, cStreamFreq = streamFreq ] (DxvkContext* ctx) { cIaState.streamsInstanced = cStreamsInstanced; cIaState.streamsUsed = 0; const auto& elements = cVertexDecl->GetElements(); std::array attrList; std::array bindList; uint32_t attrMask = 0; uint32_t bindMask = 0; const auto& isgn = cVertexShader != nullptr ? GetCommonShader(cVertexShader)->GetIsgn() : GetFixedFunctionIsgn(); for (uint32_t i = 0; i < isgn.elemCount; i++) { const auto& decl = isgn.elems[i]; DxvkVertexAttribute attrib; attrib.location = i; attrib.binding = NullStreamIdx; attrib.format = VK_FORMAT_R32G32B32A32_SFLOAT; attrib.offset = 0; for (const auto& element : elements) { DxsoSemantic elementSemantic = { static_cast(element.Usage), element.UsageIndex }; if (elementSemantic.usage == DxsoUsage::PositionT) elementSemantic.usage = DxsoUsage::Position; if (elementSemantic == decl.semantic) { attrib.binding = uint32_t(element.Stream); attrib.format = DecodeDecltype(D3DDECLTYPE(element.Type)); attrib.offset = element.Offset; cIaState.streamsUsed |= 1u << attrib.binding; break; } } attrList[i] = attrib; DxvkVertexBinding binding; binding.binding = attrib.binding; uint32_t instanceData = cStreamFreq[binding.binding % caps::MaxStreams]; if (instanceData & D3DSTREAMSOURCE_INSTANCEDATA) { binding.fetchRate = instanceData & 0x7FFFFF; // Remove instance packed-in flags in the data. binding.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE; } else { binding.fetchRate = 0; binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; } // Check if the binding was already defined. bool bindingDefined = false; for (uint32_t j = 0; j < i; j++) { uint32_t bindingId = attrList.at(j).binding; if (binding.binding == bindingId) { bindingDefined = true; } } if (!bindingDefined) bindList.at(binding.binding) = binding; attrMask |= 1u << i; bindMask |= 1u << binding.binding; } // Compact the attribute and binding lists to filter // out attributes and bindings not used by the shader uint32_t attrCount = CompactSparseList(attrList.data(), attrMask); uint32_t bindCount = CompactSparseList(bindList.data(), bindMask); ctx->setInputLayout( attrCount, attrList.data(), bindCount, bindList.data()); }); } } void D3D9DeviceEx::BindVertexBuffer( UINT Slot, D3D9VertexBuffer* pBuffer, UINT Offset, UINT Stride) { EmitCs([ cSlotId = Slot, cBufferSlice = pBuffer != nullptr ? pBuffer->GetCommonBuffer()->GetBufferSlice(Offset) : DxvkBufferSlice(), cStride = pBuffer != nullptr ? Stride : 0 ] (DxvkContext* ctx) { ctx->bindVertexBuffer(cSlotId, cBufferSlice, cStride); }); } void D3D9DeviceEx::BindIndices() { D3D9CommonBuffer* buffer = GetCommonBuffer(m_state.indices); D3D9Format format = buffer != nullptr ? buffer->Desc()->Format : D3D9Format::INDEX32; const VkIndexType indexType = DecodeIndexType(format); EmitCs([ cBufferSlice = buffer != nullptr ? buffer->GetBufferSlice() : DxvkBufferSlice(), cIndexType = indexType ](DxvkContext* ctx) { ctx->bindIndexBuffer(cBufferSlice, cIndexType); }); } void D3D9DeviceEx::Begin(D3D9Query* pQuery) { D3D9DeviceLock lock = LockDevice(); EmitCs([cQuery = Com(pQuery)](DxvkContext* ctx) { cQuery->Begin(ctx); }); } void D3D9DeviceEx::End(D3D9Query* pQuery) { D3D9DeviceLock lock = LockDevice(); EmitCs([cQuery = Com(pQuery)](DxvkContext* ctx) { cQuery->End(ctx); }); pQuery->NotifyEnd(); if (unlikely(pQuery->IsEvent())) { pQuery->IsStalling() ? Flush() : FlushImplicit(TRUE); } else if (pQuery->IsStalling()) { FlushImplicit(FALSE); } } void D3D9DeviceEx::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { m_state.vsConsts.bConsts[idx] &= ~mask; m_state.vsConsts.bConsts[idx] |= bits & mask; m_consts[DxsoProgramTypes::VertexShader].dirty = true; } void D3D9DeviceEx::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) { m_state.psConsts.bConsts[idx] &= ~mask; m_state.psConsts.bConsts[idx] |= bits & mask; m_consts[DxsoProgramTypes::PixelShader].dirty = true; } HRESULT D3D9DeviceEx::CreateShaderModule( D3D9CommonShader* pShaderModule, VkShaderStageFlagBits ShaderStage, const DWORD* pShaderBytecode, const DxsoModuleInfo* pModuleInfo) { try { m_shaderModules->GetShaderModule(this, pShaderModule, ShaderStage, pModuleInfo, pShaderBytecode); return D3D_OK; } catch (const DxvkError& e) { Logger::err(e.message()); return D3DERR_INVALIDCALL; } } template < DxsoProgramType ProgramType, D3D9ConstantType ConstantType, typename T> HRESULT D3D9DeviceEx::SetShaderConstants( UINT StartRegister, const T* pConstantData, UINT Count) { const uint32_t regCountHardware = DetermineHardwareRegCount(); constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount(); if (unlikely(StartRegister + Count > regCountSoftware)) return D3DERR_INVALIDCALL; Count = UINT( std::max( std::clamp(Count + StartRegister, 0, regCountHardware) - INT(StartRegister), 0)); if (unlikely(Count == 0)) return D3D_OK; if (unlikely(pConstantData == nullptr)) return D3DERR_INVALIDCALL; if (unlikely(ShouldRecord())) return m_recorder->SetShaderConstants( StartRegister, pConstantData, Count); if constexpr (ConstantType != D3D9ConstantType::Bool) { uint32_t maxCount = ConstantType == D3D9ConstantType::Float ? m_consts[ProgramType].meta.maxConstIndexF : m_consts[ProgramType].meta.maxConstIndexI; m_consts[ProgramType].dirty |= StartRegister < maxCount; } UpdateStateConstants( &m_state, StartRegister, pConstantData, Count, m_d3d9Options.d3d9FloatEmulation); return D3D_OK; } void D3D9DeviceEx::UpdateFixedFunctionVS() { // Shader... bool hasPositionT = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) : false; bool hasBlendWeight = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendWeight) : false; bool hasBlendIndices = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendIndices) : false; bool indexedVertexBlend = hasBlendIndices && m_state.renderStates[D3DRS_INDEXEDVERTEXBLENDENABLE]; D3D9FF_VertexBlendMode vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE && !hasPositionT) { vertexBlendMode = m_state.renderStates[D3DRS_VERTEXBLEND] == D3DVBF_TWEENING ? D3D9FF_VertexBlendMode_Tween : D3D9FF_VertexBlendMode_Normal; if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) { if (!hasBlendWeight) vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; } else if (!indexedVertexBlend) vertexBlendMode = D3D9FF_VertexBlendMode_Disabled; } if (unlikely(hasPositionT && m_state.vertexShader != nullptr && !m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) { m_flags.set(D3D9DeviceFlag::DirtyInputLayout); m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); m_flags.set(D3D9DeviceFlag::DirtyProgVertexShader); } if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexShader)) { m_flags.clr(D3D9DeviceFlag::DirtyFFVertexShader); D3D9FFShaderKeyVS key; key.Data.Contents.HasPositionT = hasPositionT; key.Data.Contents.HasColor0 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) : false; key.Data.Contents.HasColor1 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) : false; key.Data.Contents.HasPointSize = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPointSize) : false; key.Data.Contents.HasFog = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasFog) : false; bool lighting = m_state.renderStates[D3DRS_LIGHTING] != 0 && !key.Data.Contents.HasPositionT; bool colorVertex = m_state.renderStates[D3DRS_COLORVERTEX] != 0; uint32_t mask = (lighting && colorVertex) ? (key.Data.Contents.HasColor0 ? D3DMCS_COLOR1 : D3DMCS_MATERIAL) | (key.Data.Contents.HasColor1 ? D3DMCS_COLOR2 : D3DMCS_MATERIAL) : 0; key.Data.Contents.UseLighting = lighting; key.Data.Contents.NormalizeNormals = m_state.renderStates[D3DRS_NORMALIZENORMALS]; key.Data.Contents.LocalViewer = m_state.renderStates[D3DRS_LOCALVIEWER] && lighting; key.Data.Contents.RangeFog = m_state.renderStates[D3DRS_RANGEFOGENABLE]; key.Data.Contents.DiffuseSource = m_state.renderStates[D3DRS_DIFFUSEMATERIALSOURCE] & mask; key.Data.Contents.AmbientSource = m_state.renderStates[D3DRS_AMBIENTMATERIALSOURCE] & mask; key.Data.Contents.SpecularSource = m_state.renderStates[D3DRS_SPECULARMATERIALSOURCE] & mask; key.Data.Contents.EmissiveSource = m_state.renderStates[D3DRS_EMISSIVEMATERIALSOURCE] & mask; uint32_t lightCount = 0; if (key.Data.Contents.UseLighting) { for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { if (m_state.enabledLightIndices[i] != UINT32_MAX) lightCount++; } } key.Data.Contents.LightCount = lightCount; for (uint32_t i = 0; i < caps::MaxTextureBlendStages; i++) { uint32_t transformFlags = m_state.textureStages[i][DXVK_TSS_TEXTURETRANSFORMFLAGS] & ~(D3DTTFF_PROJECTED); uint32_t index = m_state.textureStages[i][DXVK_TSS_TEXCOORDINDEX]; uint32_t indexFlags = (index & TCIMask) >> TCIOffset; transformFlags &= 0b111; index &= 0b111; key.Data.Contents.TransformFlags |= transformFlags << (i * 3); key.Data.Contents.TexcoordFlags |= indexFlags << (i * 3); key.Data.Contents.TexcoordIndices |= index << (i * 3); } key.Data.Contents.TexcoordDeclMask = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetTexcoordMask() : 0; key.Data.Contents.VertexBlendMode = uint32_t(vertexBlendMode); if (vertexBlendMode == D3D9FF_VertexBlendMode_Normal) { key.Data.Contents.VertexBlendIndexed = indexedVertexBlend; key.Data.Contents.VertexBlendCount = m_state.renderStates[D3DRS_VERTEXBLEND] & 0xff; } key.Data.Contents.VertexClipping = IsClipPlaneEnabled(); EmitCs([ this, cKey = key, &cShaders = m_ffModules ](DxvkContext* ctx) { auto shader = cShaders.GetShaderModule(this, cKey); ctx->bindShader(VK_SHADER_STAGE_VERTEX_BIT, shader.GetShader()); }); } if (hasPositionT && (m_flags.test(D3D9DeviceFlag::DirtyFFViewport) || m_ffZTest != IsZTestEnabled())) { m_flags.clr(D3D9DeviceFlag::DirtyFFViewport); m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); const auto& vp = m_state.viewport; // For us to account for the Vulkan viewport rules // when translating Window Coords -> Real Coords: // We need to negate the inverse extent we multiply by, // this follows through to the offset when that gets // timesed by it. // The 1.0f additional offset however does not, // so we account for that there manually. m_ffZTest = IsZTestEnabled(); m_viewportInfo.inverseExtent = Vector4( 2.0f / float(vp.Width), -2.0f / float(vp.Height), m_ffZTest ? 1.0f : 0.0f, 1.0f); m_viewportInfo.inverseOffset = Vector4( -float(vp.X), -float(vp.Y), 0.0f, 0.0f); m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset * m_viewportInfo.inverseExtent; m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset + Vector4(-1.0f, 1.0f, 0.0f, 0.0f); } // Constants... if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexData)) { m_flags.clr(D3D9DeviceFlag::DirtyFFVertexData); DxvkBufferSliceHandle slice = m_vsFixedFunction->allocSlice(); EmitCs([ cBuffer = m_vsFixedFunction, cSlice = slice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cSlice); }); auto WorldView = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLD)]; auto NormalMatrix = inverse(WorldView); D3D9FixedFunctionVS* data = reinterpret_cast(slice.mapPtr); data->WorldView = WorldView; data->NormalMatrix = NormalMatrix; data->InverseView = transpose(inverse(m_state.transforms[GetTransformIndex(D3DTS_VIEW)])); data->Projection = m_state.transforms[GetTransformIndex(D3DTS_PROJECTION)]; for (uint32_t i = 0; i < data->TexcoordMatrices.size(); i++) data->TexcoordMatrices[i] = m_state.transforms[GetTransformIndex(D3DTS_TEXTURE0) + i]; data->ViewportInfo = m_viewportInfo; DecodeD3DCOLOR(m_state.renderStates[D3DRS_AMBIENT], data->GlobalAmbient.data); uint32_t lightIdx = 0; for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { auto idx = m_state.enabledLightIndices[i]; if (idx == UINT32_MAX) continue; data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]); } data->Material = m_state.material; data->TweenFactor = bit::cast(m_state.renderStates[D3DRS_TWEENFACTOR]); } if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexBlend) && vertexBlendMode == D3D9FF_VertexBlendMode_Normal) { m_flags.clr(D3D9DeviceFlag::DirtyFFVertexBlend); DxvkBufferSliceHandle slice = m_vsVertexBlend->allocSlice(); EmitCs([ cBuffer = m_vsVertexBlend, cSlice = slice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cSlice); }); auto UploadVertexBlendData = [&](auto data) { for (uint32_t i = 0; i < countof(data->WorldView); i++) data->WorldView[i] = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLDMATRIX(i))]; }; (m_isSWVP && indexedVertexBlend) ? UploadVertexBlendData(reinterpret_cast(slice.mapPtr)) : UploadVertexBlendData(reinterpret_cast(slice.mapPtr)); } } void D3D9DeviceEx::UpdateFixedFunctionPS() { // Shader... if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelShader)) { m_flags.clr(D3D9DeviceFlag::DirtyFFPixelShader); // Used args for a given operation. auto ArgsMask = [](DWORD Op) { switch (Op) { case D3DTOP_DISABLE: return 0b000u; // No Args case D3DTOP_SELECTARG1: case D3DTOP_PREMODULATE: return 0b010u; // Arg 1 case D3DTOP_SELECTARG2: return 0b100u; // Arg 2 case D3DTOP_MULTIPLYADD: case D3DTOP_LERP: return 0b111u; // Arg 0, 1, 2 default: return 0b110u; // Arg 1, 2 } }; D3D9FFShaderKeyFS key; uint32_t idx; for (idx = 0; idx < caps::TextureStageCount; idx++) { auto& stage = key.Stages[idx].Contents; auto& data = m_state.textureStages[idx]; // Subsequent stages do not occur if this is true. if (data[DXVK_TSS_COLOROP] == D3DTOP_DISABLE) break; // If the stage is invalid (ie. no texture bound), // this and all subsequent stages get disabled. if (m_state.textures[idx] == nullptr) { if (((data[DXVK_TSS_COLORARG0] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 0u))) || ((data[DXVK_TSS_COLORARG1] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 1u))) || ((data[DXVK_TSS_COLORARG2] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[DXVK_TSS_COLOROP]) & (1 << 2u)))) break; } stage.ColorOp = data[DXVK_TSS_COLOROP]; stage.AlphaOp = data[DXVK_TSS_ALPHAOP]; stage.ColorArg0 = data[DXVK_TSS_COLORARG0]; stage.ColorArg1 = data[DXVK_TSS_COLORARG1]; stage.ColorArg2 = data[DXVK_TSS_COLORARG2]; stage.AlphaArg0 = data[DXVK_TSS_ALPHAARG0]; stage.AlphaArg1 = data[DXVK_TSS_ALPHAARG1]; stage.AlphaArg2 = data[DXVK_TSS_ALPHAARG2]; const uint32_t samplerOffset = idx * 2; stage.Type = (m_samplerTypeBitfield >> samplerOffset) & 0xffu; stage.ResultIsTemp = data[DXVK_TSS_RESULTARG] == D3DTA_TEMP; uint32_t ttff = data[DXVK_TSS_TEXTURETRANSFORMFLAGS]; uint32_t count = ttff & ~D3DTTFF_PROJECTED; stage.Projected = (ttff & D3DTTFF_PROJECTED) ? 1 : 0; stage.ProjectedCount = (ttff & D3DTTFF_PROJECTED) ? count : 0; } auto& stage0 = key.Stages[0].Contents; if (stage0.ResultIsTemp && stage0.ColorOp != D3DTOP_DISABLE && stage0.AlphaOp == D3DTOP_DISABLE) { stage0.AlphaOp = D3DTOP_SELECTARG1; stage0.AlphaArg1 = D3DTA_DIFFUSE; } stage0.GlobalSpecularEnable = m_state.renderStates[D3DRS_SPECULARENABLE]; stage0.GlobalFlatShade = m_state.renderStates[D3DRS_SHADEMODE] == D3DSHADE_FLAT; // The last stage *always* writes to current. if (idx >= 1) key.Stages[idx - 1].Contents.ResultIsTemp = false; EmitCs([ this, cKey = key, &cShaders = m_ffModules ](DxvkContext* ctx) { auto shader = cShaders.GetShaderModule(this, cKey); ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, shader.GetShader()); }); } // Constants if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelData)) { m_flags.clr(D3D9DeviceFlag::DirtyFFPixelData); DxvkBufferSliceHandle slice = m_psFixedFunction->allocSlice(); EmitCs([ cBuffer = m_psFixedFunction, cSlice = slice ] (DxvkContext* ctx) { ctx->invalidateBuffer(cBuffer, cSlice); }); auto& rs = m_state.renderStates; D3D9FixedFunctionPS* data = reinterpret_cast(slice.mapPtr); DecodeD3DCOLOR((D3DCOLOR)rs[D3DRS_TEXTUREFACTOR], data->textureFactor.data); } } bool D3D9DeviceEx::UseProgrammableVS() { return m_state.vertexShader != nullptr && m_state.vertexDecl != nullptr && !m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT); } bool D3D9DeviceEx::UseProgrammablePS() { return m_state.pixelShader != nullptr; } void D3D9DeviceEx::UpdateBoolSpecConstantVertex(uint32_t value) { if (value == m_lastBoolSpecConstantVertex) return; EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexShaderBools, cBitfield); }); m_lastBoolSpecConstantVertex = value; } void D3D9DeviceEx::UpdateBoolSpecConstantPixel(uint32_t value) { if (value == m_lastBoolSpecConstantPixel) return; EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelShaderBools, cBitfield); }); m_lastBoolSpecConstantPixel = value; } void D3D9DeviceEx::UpdateSamplerSpecConsant(uint32_t value) { EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerType, cBitfield); }); m_lastSamplerTypeBitfield = value; } void D3D9DeviceEx::UpdateProjectionSpecConstant(uint32_t value) { EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::ProjectionType, cBitfield); }); m_lastProjectionBitfield = value; } void D3D9DeviceEx::UpdateFetch4SpecConstant(uint32_t value) { EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::Fetch4, cBitfield); }); m_lastFetch4 = value; } void D3D9DeviceEx::UpdateSamplerDepthModeSpecConstant(uint32_t value) { EmitCs([cBitfield = value](DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerDepthMode, cBitfield); }); m_lastSamplerDepthMode = value; } void D3D9DeviceEx::ApplyPrimitiveType( DxvkContext* pContext, D3DPRIMITIVETYPE PrimType) { if (m_iaState.primitiveType != PrimType) { m_iaState.primitiveType = PrimType; auto iaState = DecodeInputAssemblyState(PrimType); pContext->setInputAssemblyState(iaState); } } void D3D9DeviceEx::ResolveZ() { D3D9Surface* src = m_state.depthStencil.ptr(); IDirect3DBaseTexture9* dst = m_state.textures[0]; if (unlikely(!src || !dst)) return; D3D9CommonTexture* srcTextureInfo = GetCommonTexture(src); D3D9CommonTexture* dstTextureInfo = GetCommonTexture(dst); const D3D9_COMMON_TEXTURE_DESC* srcDesc = srcTextureInfo->Desc(); const D3D9_COMMON_TEXTURE_DESC* dstDesc = dstTextureInfo->Desc(); VkSampleCountFlagBits dstSampleCount; DecodeMultiSampleType(dstDesc->MultiSample, dstDesc->MultisampleQuality, &dstSampleCount); if (unlikely(dstSampleCount != VK_SAMPLE_COUNT_1_BIT)) { Logger::warn("D3D9DeviceEx::ResolveZ: dstSampleCount != 1. Discarding."); return; } const D3D9_VK_FORMAT_MAPPING srcFormatInfo = LookupFormat(srcDesc->Format); const D3D9_VK_FORMAT_MAPPING dstFormatInfo = LookupFormat(dstDesc->Format); auto srcVulkanFormatInfo = imageFormatInfo(srcFormatInfo.FormatColor); auto dstVulkanFormatInfo = imageFormatInfo(dstFormatInfo.FormatColor); const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex( dstVulkanFormatInfo->aspectMask, 0); const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex( srcVulkanFormatInfo->aspectMask, src->GetSubresource()); const VkImageSubresourceLayers dstSubresourceLayers = { dstSubresource.aspectMask, dstSubresource.mipLevel, dstSubresource.arrayLayer, 1 }; const VkImageSubresourceLayers srcSubresourceLayers = { srcSubresource.aspectMask, srcSubresource.mipLevel, srcSubresource.arrayLayer, 1 }; VkSampleCountFlagBits srcSampleCount; DecodeMultiSampleType(srcDesc->MultiSample, srcDesc->MultisampleQuality, &srcSampleCount); if (srcSampleCount == VK_SAMPLE_COUNT_1_BIT) { EmitCs([ cDstImage = dstTextureInfo->GetImage(), cSrcImage = srcTextureInfo->GetImage(), cDstLayers = dstSubresourceLayers, cSrcLayers = srcSubresourceLayers ] (DxvkContext* ctx) { ctx->copyImage( cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 }, cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 }, cDstImage->mipLevelExtent(cDstLayers.mipLevel)); }); } else { EmitCs([ cDstImage = dstTextureInfo->GetImage(), cSrcImage = srcTextureInfo->GetImage(), cDstSubres = dstSubresourceLayers, cSrcSubres = srcSubresourceLayers ] (DxvkContext* ctx) { // We should resolve using the first sample according to // http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Advanced-DX9-Capabilities-for-ATI-Radeon-Cards_v2.pdf // "The resolve operation copies the depth value from the *first sample only* into the resolved depth stencil texture." constexpr auto resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR; VkImageResolve region; region.srcSubresource = cSrcSubres; region.srcOffset = VkOffset3D { 0, 0, 0 }; region.dstSubresource = cDstSubres; region.dstOffset = VkOffset3D { 0, 0, 0 }; region.extent = cDstImage->mipLevelExtent(cDstSubres.mipLevel); ctx->resolveDepthStencilImage(cDstImage, cSrcImage, region, resolveMode, resolveMode); }); } dstTextureInfo->MarkAllWrittenByGPU(); } void D3D9DeviceEx::TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout) { EmitCs([ cImage = pResource->GetImage(), cNewLayout = NewLayout ] (DxvkContext* ctx) { ctx->changeImageLayout( cImage, cNewLayout); }); } void D3D9DeviceEx::TransformImage( D3D9CommonTexture* pResource, const VkImageSubresourceRange* pSubresources, VkImageLayout OldLayout, VkImageLayout NewLayout) { EmitCs([ cImage = pResource->GetImage(), cSubresources = *pSubresources, cOldLayout = OldLayout, cNewLayout = NewLayout ] (DxvkContext* ctx) { ctx->transformImage( cImage, cSubresources, cOldLayout, cNewLayout); }); } HRESULT D3D9DeviceEx::ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters) { if (!pPresentationParameters->EnableAutoDepthStencil) SetDepthStencilSurface(nullptr); for (uint32_t i = 1; i < caps::MaxSimultaneousRenderTargets; i++) SetRenderTarget(i, nullptr); auto& rs = m_state.renderStates; rs[D3DRS_SEPARATEALPHABLENDENABLE] = FALSE; rs[D3DRS_ALPHABLENDENABLE] = FALSE; rs[D3DRS_BLENDOP] = D3DBLENDOP_ADD; rs[D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD; rs[D3DRS_DESTBLEND] = D3DBLEND_ZERO; rs[D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO; rs[D3DRS_COLORWRITEENABLE] = 0x0000000f; rs[D3DRS_COLORWRITEENABLE1] = 0x0000000f; rs[D3DRS_COLORWRITEENABLE2] = 0x0000000f; rs[D3DRS_COLORWRITEENABLE3] = 0x0000000f; rs[D3DRS_SRCBLEND] = D3DBLEND_ONE; rs[D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE; BindBlendState(); rs[D3DRS_BLENDFACTOR] = 0xffffffff; BindBlendFactor(); rs[D3DRS_ZENABLE] = pPresentationParameters->EnableAutoDepthStencil ? D3DZB_TRUE : D3DZB_FALSE; rs[D3DRS_ZFUNC] = D3DCMP_LESSEQUAL; rs[D3DRS_TWOSIDEDSTENCILMODE] = FALSE; rs[D3DRS_ZWRITEENABLE] = TRUE; rs[D3DRS_STENCILENABLE] = FALSE; rs[D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP; rs[D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP; rs[D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP; rs[D3DRS_STENCILFUNC] = D3DCMP_ALWAYS; rs[D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP; rs[D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP; rs[D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP; rs[D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS; rs[D3DRS_STENCILMASK] = 0xFFFFFFFF; rs[D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF; BindDepthStencilState(); rs[D3DRS_STENCILREF] = 0; BindDepthStencilRefrence(); rs[D3DRS_FILLMODE] = D3DFILL_SOLID; rs[D3DRS_CULLMODE] = D3DCULL_CCW; rs[D3DRS_DEPTHBIAS] = bit::cast(0.0f); rs[D3DRS_SLOPESCALEDEPTHBIAS] = bit::cast(0.0f); BindRasterizerState(); BindDepthBias(); rs[D3DRS_SCISSORTESTENABLE] = FALSE; rs[D3DRS_ALPHATESTENABLE] = FALSE; rs[D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS; BindAlphaTestState(); rs[D3DRS_ALPHAREF] = 0; UpdatePushConstant(); rs[D3DRS_MULTISAMPLEMASK] = 0xffffffff; BindMultiSampleState(); rs[D3DRS_TEXTUREFACTOR] = 0xffffffff; m_flags.set(D3D9DeviceFlag::DirtyFFPixelData); rs[D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1; rs[D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2; rs[D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL; rs[D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL; rs[D3DRS_LIGHTING] = TRUE; rs[D3DRS_COLORVERTEX] = TRUE; rs[D3DRS_LOCALVIEWER] = TRUE; rs[D3DRS_RANGEFOGENABLE] = FALSE; rs[D3DRS_NORMALIZENORMALS] = FALSE; m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader); // PS rs[D3DRS_SPECULARENABLE] = FALSE; rs[D3DRS_AMBIENT] = 0; m_flags.set(D3D9DeviceFlag::DirtyFFVertexData); rs[D3DRS_FOGENABLE] = FALSE; rs[D3DRS_FOGCOLOR] = 0; rs[D3DRS_FOGTABLEMODE] = D3DFOG_NONE; rs[D3DRS_FOGSTART] = bit::cast(0.0f); rs[D3DRS_FOGEND] = bit::cast(1.0f); rs[D3DRS_FOGDENSITY] = bit::cast(1.0f); rs[D3DRS_FOGVERTEXMODE] = D3DFOG_NONE; m_flags.set(D3D9DeviceFlag::DirtyFogColor); m_flags.set(D3D9DeviceFlag::DirtyFogDensity); m_flags.set(D3D9DeviceFlag::DirtyFogEnd); m_flags.set(D3D9DeviceFlag::DirtyFogScale); m_flags.set(D3D9DeviceFlag::DirtyFogState); rs[D3DRS_CLIPPLANEENABLE] = 0; m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); rs[D3DRS_POINTSPRITEENABLE] = FALSE; rs[D3DRS_POINTSCALEENABLE] = FALSE; rs[D3DRS_POINTSCALE_A] = bit::cast(1.0f); rs[D3DRS_POINTSCALE_B] = bit::cast(0.0f); rs[D3DRS_POINTSCALE_C] = bit::cast(0.0f); rs[D3DRS_POINTSIZE] = bit::cast(1.0f); rs[D3DRS_POINTSIZE_MIN] = bit::cast(1.0f); rs[D3DRS_POINTSIZE_MAX] = bit::cast(64.0f); UpdatePushConstant(); UpdatePushConstant(); UpdatePushConstant(); m_flags.set(D3D9DeviceFlag::DirtyPointScale); UpdatePointMode(); rs[D3DRS_SRGBWRITEENABLE] = 0; rs[D3DRS_SHADEMODE] = D3DSHADE_GOURAUD; rs[D3DRS_VERTEXBLEND] = D3DVBF_DISABLE; rs[D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE; rs[D3DRS_TWEENFACTOR] = bit::cast(0.0f); m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend); // Render States not implemented beyond this point. rs[D3DRS_LASTPIXEL] = TRUE; rs[D3DRS_DITHERENABLE] = FALSE; rs[D3DRS_WRAP0] = 0; rs[D3DRS_WRAP1] = 0; rs[D3DRS_WRAP2] = 0; rs[D3DRS_WRAP3] = 0; rs[D3DRS_WRAP4] = 0; rs[D3DRS_WRAP5] = 0; rs[D3DRS_WRAP6] = 0; rs[D3DRS_WRAP7] = 0; rs[D3DRS_CLIPPING] = TRUE; rs[D3DRS_MULTISAMPLEANTIALIAS] = TRUE; rs[D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE; rs[D3DRS_DEBUGMONITORTOKEN] = D3DDMT_ENABLE; rs[D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC; rs[D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR; rs[D3DRS_ANTIALIASEDLINEENABLE] = FALSE; rs[D3DRS_MINTESSELLATIONLEVEL] = bit::cast(1.0f); rs[D3DRS_MAXTESSELLATIONLEVEL] = bit::cast(1.0f); rs[D3DRS_ADAPTIVETESS_X] = bit::cast(0.0f); rs[D3DRS_ADAPTIVETESS_Y] = bit::cast(0.0f); rs[D3DRS_ADAPTIVETESS_Z] = bit::cast(1.0f); rs[D3DRS_ADAPTIVETESS_W] = bit::cast(0.0f); rs[D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE; rs[D3DRS_WRAP8] = 0; rs[D3DRS_WRAP9] = 0; rs[D3DRS_WRAP10] = 0; rs[D3DRS_WRAP11] = 0; rs[D3DRS_WRAP12] = 0; rs[D3DRS_WRAP13] = 0; rs[D3DRS_WRAP14] = 0; rs[D3DRS_WRAP15] = 0; // End Unimplemented Render States for (uint32_t i = 0; i < caps::TextureStageCount; i++) { auto& stage = m_state.textureStages[i]; stage[DXVK_TSS_COLOROP] = i == 0 ? D3DTOP_MODULATE : D3DTOP_DISABLE; stage[DXVK_TSS_COLORARG1] = D3DTA_TEXTURE; stage[DXVK_TSS_COLORARG2] = D3DTA_CURRENT; stage[DXVK_TSS_ALPHAOP] = i == 0 ? D3DTOP_SELECTARG1 : D3DTOP_DISABLE; stage[DXVK_TSS_ALPHAARG1] = D3DTA_TEXTURE; stage[DXVK_TSS_ALPHAARG2] = D3DTA_CURRENT; stage[DXVK_TSS_BUMPENVMAT00] = bit::cast(0.0f); stage[DXVK_TSS_BUMPENVMAT01] = bit::cast(0.0f); stage[DXVK_TSS_BUMPENVMAT10] = bit::cast(0.0f); stage[DXVK_TSS_BUMPENVMAT11] = bit::cast(0.0f); stage[DXVK_TSS_TEXCOORDINDEX] = i; stage[DXVK_TSS_BUMPENVLSCALE] = bit::cast(0.0f); stage[DXVK_TSS_BUMPENVLOFFSET] = bit::cast(0.0f); stage[DXVK_TSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE; stage[DXVK_TSS_COLORARG0] = D3DTA_CURRENT; stage[DXVK_TSS_ALPHAARG0] = D3DTA_CURRENT; stage[DXVK_TSS_RESULTARG] = D3DTA_CURRENT; stage[DXVK_TSS_CONSTANT] = 0x00000000; } m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData); m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); for (uint32_t i = 0; i < caps::MaxStreams; i++) m_state.streamFreq[i] = 1; for (uint32_t i = 0; i < m_state.textures.size(); i++) TextureChangePrivate(m_state.textures[i], nullptr); EmitCs([ cSize = m_state.textures.size() ](DxvkContext* ctx) { for (uint32_t i = 0; i < cSize; i++) { auto samplerInfo = RemapStateSamplerShader(DWORD(i)); uint32_t slot = computeResourceSlotId(samplerInfo.first, DxsoBindingType::Image, uint32_t(samplerInfo.second)); ctx->bindResourceView(slot, nullptr, nullptr); } }); m_dirtyTextures = 0; m_depthTextures = 0; auto& ss = m_state.samplerStates; for (uint32_t i = 0; i < ss.size(); i++) { auto& state = ss[i]; state[D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP; state[D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP; state[D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP; state[D3DSAMP_BORDERCOLOR] = 0x00000000; state[D3DSAMP_MAGFILTER] = D3DTEXF_POINT; state[D3DSAMP_MINFILTER] = D3DTEXF_POINT; state[D3DSAMP_MIPFILTER] = D3DTEXF_NONE; state[D3DSAMP_MIPMAPLODBIAS] = bit::cast(0.0f); state[D3DSAMP_MAXMIPLEVEL] = 0; state[D3DSAMP_MAXANISOTROPY] = 1; state[D3DSAMP_SRGBTEXTURE] = 0; state[D3DSAMP_ELEMENTINDEX] = 0; state[D3DSAMP_DMAPOFFSET] = 0; BindSampler(i); } m_dirtySamplerStates = 0; for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { float plane[4] = { 0, 0, 0, 0 }; SetClipPlane(i, plane); } // We should do this... m_flags.set(D3D9DeviceFlag::DirtyInputLayout); UpdateSamplerSpecConsant(0u); UpdateBoolSpecConstantVertex(0u); UpdateBoolSpecConstantPixel(0u); UpdateSamplerDepthModeSpecConstant(0u); return D3D_OK; } HRESULT D3D9DeviceEx::ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { D3D9Format backBufferFmt = EnumerateFormat(pPresentationParameters->BackBufferFormat); Logger::info(str::format( "D3D9DeviceEx::ResetSwapChain:\n", " Requested Presentation Parameters\n", " - Width: ", pPresentationParameters->BackBufferWidth, "\n", " - Height: ", pPresentationParameters->BackBufferHeight, "\n", " - Format: ", backBufferFmt, "\n" " - Auto Depth Stencil: ", pPresentationParameters->EnableAutoDepthStencil ? "true" : "false", "\n", " ^ Format: ", EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat), "\n", " - Windowed: ", pPresentationParameters->Windowed ? "true" : "false", "\n")); if (backBufferFmt != D3D9Format::Unknown) { if (!IsSupportedBackBufferFormat(backBufferFmt)) { Logger::err(str::format("D3D9DeviceEx::ResetSwapChain: Unsupported backbuffer format: ", EnumerateFormat(pPresentationParameters->BackBufferFormat))); return D3DERR_INVALIDCALL; } } if (m_implicitSwapchain != nullptr) { if (FAILED(m_implicitSwapchain->Reset(pPresentationParameters, pFullscreenDisplayMode))) return D3DERR_INVALIDCALL; } else m_implicitSwapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode); if (pPresentationParameters->EnableAutoDepthStencil) { D3D9_COMMON_TEXTURE_DESC desc; desc.Width = pPresentationParameters->BackBufferWidth; desc.Height = pPresentationParameters->BackBufferHeight; desc.Depth = 1; desc.ArraySize = 1; desc.MipLevels = 1; desc.Usage = D3DUSAGE_DEPTHSTENCIL; desc.Format = EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat); desc.Pool = D3DPOOL_DEFAULT; desc.Discard = FALSE; desc.MultiSample = pPresentationParameters->MultiSampleType; desc.MultisampleQuality = pPresentationParameters->MultiSampleQuality; desc.IsBackBuffer = FALSE; desc.IsAttachmentOnly = TRUE; if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc))) return D3DERR_NOTAVAILABLE; m_autoDepthStencil = new D3D9Surface(this, &desc, nullptr); m_initializer->InitTexture(m_autoDepthStencil->GetCommonTexture()); SetDepthStencilSurface(m_autoDepthStencil.ptr()); } SetRenderTarget(0, m_implicitSwapchain->GetBackBuffer(0)); // Force this if we end up binding the same RT to make scissor change go into effect. BindViewportAndScissor(); return D3D_OK; } HRESULT D3D9DeviceEx::InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) { HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode); if (FAILED(hr)) return hr; hr = ResetState(pPresentationParameters); if (FAILED(hr)) return hr; Flush(); SynchronizeCsThread(); return D3D_OK; } }