1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2024-12-14 09:23:53 +01:00
dxvk/src/d3d9/d3d9_device.cpp
Philip Rebohle 905d69e77b [d3d9] Don't pass format mapping to D3D9CommonTexture
Instead, infer it from the format. This is basically being done
already, however the mapping we pass in is not correct if the
image format is Unknown.
2020-01-17 17:47:08 +01:00

6554 lines
209 KiB
C++

#include "d3d9_device.h"
#include "d3d9_interface.h"
#include "d3d9_swapchain.h"
#include "d3d9_caps.h"
#include "d3d9_util.h"
#include "d3d9_texture.h"
#include "d3d9_buffer.h"
#include "d3d9_vertex_declaration.h"
#include "d3d9_shader.h"
#include "d3d9_query.h"
#include "d3d9_stateblock.h"
#include "d3d9_monitor.h"
#include "d3d9_spec_constants.h"
#include "d3d9_names.h"
#include "d3d9_format_helpers.h"
#include "../dxvk/dxvk_adapter.h"
#include "../dxvk/dxvk_instance.h"
#include "../util/util_bit.h"
#include "../util/util_math.h"
#include "d3d9_initializer.h"
#include <algorithm>
#include <cfloat>
#ifdef MSC_VER
#pragma fenv_access (on)
#endif
namespace dxvk {
D3D9DeviceEx::D3D9DeviceEx(
D3D9InterfaceEx* pParent,
D3D9Adapter* pAdapter,
D3DDEVTYPE DeviceType,
HWND hFocusWindow,
DWORD BehaviorFlags,
Rc<DxvkDevice> dxvkDevice)
: m_adapter ( pAdapter )
, m_dxvkDevice ( dxvkDevice )
, m_csThread ( dxvkDevice->createContext() )
, m_csChunk ( AllocCsChunk() )
, m_parent ( pParent )
, m_deviceType ( DeviceType )
, m_window ( hFocusWindow )
, m_behaviorFlags ( BehaviorFlags )
, m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED )
, m_shaderModules ( new D3D9ShaderModuleSet )
, m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() )
, m_dxsoOptions ( m_dxvkDevice, m_d3d9Options )
, m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? TRUE : FALSE ) {
// If we can SWVP, then we use an extended constant set
// as SWVP has many more slots available than HWVP.
bool canSWVP = CanSWVP();
DetermineConstantLayouts(canSWVP);
if (canSWVP)
Logger::info("D3D9DeviceEx: Using extended constant set for software vertex processing.");
m_initializer = new D3D9Initializer(m_dxvkDevice);
m_converter = new D3D9FormatHelper(m_dxvkDevice);
EmitCs([
cDevice = m_dxvkDevice
] (DxvkContext* ctx) {
ctx->beginRecording(cDevice->createCommandList());
DxvkLogicOpState loState;
loState.enableLogicOp = VK_FALSE;
loState.logicOp = VK_LOGIC_OP_CLEAR;
ctx->setLogicOpState(loState);
});
CreateConstantBuffers();
if (!(BehaviorFlags & D3DCREATE_FPU_PRESERVE))
SetupFPU();
m_availableMemory = DetermineInitialTextureMemory();
}
D3D9DeviceEx::~D3D9DeviceEx() {
Flush();
SynchronizeCsThread();
delete m_initializer;
delete m_converter;
m_dxvkDevice->waitForIdle(); // Sync Device
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::QueryInterface(REFIID riid, void** ppvObject) {
if (ppvObject == nullptr)
return E_POINTER;
*ppvObject = nullptr;
bool extended = m_parent->IsExtended()
&& riid == __uuidof(IDirect3DDevice9Ex);
if (riid == __uuidof(IUnknown)
|| riid == __uuidof(IDirect3DDevice9)
|| extended) {
*ppvObject = ref(this);
return S_OK;
}
// We want to ignore this if the extended device is queried and we weren't made extended.
if (riid == __uuidof(IDirect3DDevice9Ex))
return E_NOINTERFACE;
Logger::warn("D3D9DeviceEx::QueryInterface: Unknown interface query");
Logger::warn(str::format(riid));
return E_NOINTERFACE;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::TestCooperativeLevel() {
// Equivelant of D3D11/DXGI present tests. We can always present.
return D3D_OK;
}
UINT STDMETHODCALLTYPE D3D9DeviceEx::GetAvailableTextureMem() {
// This is not meant to be accurate.
// The values are also wildly incorrect in d3d9... But some games rely
// on this inaccurate value...
// Clamp to megabyte range, as per spec.
constexpr UINT range = 0xfff00000;
// Can't have negative memory!
int64_t memory = std::max<int64_t>(m_availableMemory.load(), 0);
return UINT(memory) & range;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EvictManagedResources() {
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDirect3D(IDirect3D9** ppD3D9) {
if (ppD3D9 == nullptr)
return D3DERR_INVALIDCALL;
*ppD3D9 = m_parent.ref();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDeviceCaps(D3DCAPS9* pCaps) {
return m_adapter->GetDeviceCaps(m_deviceType, pCaps);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE* pMode) {
D3D9DeviceLock lock = LockDevice();
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
return swapchain->GetDisplayMode(pMode);
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) {
if (pParameters == nullptr)
return D3DERR_INVALIDCALL;
pParameters->AdapterOrdinal = m_adapter->GetOrdinal();
pParameters->BehaviorFlags = m_behaviorFlags;
pParameters->DeviceType = m_deviceType;
pParameters->hFocusWindow = m_window;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCursorProperties(
UINT XHotSpot,
UINT YHotSpot,
IDirect3DSurface9* pCursorBitmap) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pCursorBitmap == nullptr))
return D3DERR_INVALIDCALL;
auto* cursorTex = GetCommonTexture(pCursorBitmap);
if (unlikely(cursorTex->Desc()->Format != D3D9Format::A8R8G8B8))
return D3DERR_INVALIDCALL;
uint32_t inputWidth = cursorTex->Desc()->Width;
uint32_t inputHeight = cursorTex->Desc()->Height;
// Always use a hardware cursor when windowed.
bool hwCursor = m_presentParams.Windowed;
// Always use a hardware cursor w/h <= 32 px
hwCursor |= inputWidth <= HardwareCursorWidth
|| inputHeight <= HardwareCursorHeight;
if (hwCursor) {
D3DLOCKED_BOX lockedBox;
HRESULT hr = LockImage(cursorTex, 0, 0, &lockedBox, nullptr, D3DLOCK_READONLY);
if (FAILED(hr))
return hr;
const uint8_t* data = reinterpret_cast<const uint8_t*>(lockedBox.pBits);
// Windows works with a stride of 128, lets respect that.
// Copy data to the bitmap...
CursorBitmap bitmap = { 0 };
size_t copyPitch = std::min<size_t>(
HardwareCursorPitch,
inputWidth * inputHeight * HardwareCursorFormatSize);
for (uint32_t h = 0; h < HardwareCursorHeight; h++)
std::memcpy(&bitmap[h * HardwareCursorPitch], &data[h * lockedBox.RowPitch], copyPitch);
UnlockImage(cursorTex, 0, 0);
// Set this as our cursor.
return m_cursor.SetHardwareCursor(XHotSpot, YHotSpot, bitmap);
}
// Software Cursor...
Logger::warn("D3D9DeviceEx::SetCursorProperties: Software cursor not implemented.");
return D3D_OK;
}
void STDMETHODCALLTYPE D3D9DeviceEx::SetCursorPosition(int X, int Y, DWORD Flags) {
D3D9DeviceLock lock = LockDevice();
// I was not able to find an instance
// where the cursor update was not immediate.
// Fullscreen + Windowed seem to have the same
// behaviour here.
// Hence we ignore the flag D3DCURSOR_IMMEDIATE_UPDATE.
m_cursor.UpdateCursor(X, Y);
}
BOOL STDMETHODCALLTYPE D3D9DeviceEx::ShowCursor(BOOL bShow) {
D3D9DeviceLock lock = LockDevice();
return m_cursor.ShowCursor(bShow);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChain(
D3DPRESENT_PARAMETERS* pPresentationParameters,
IDirect3DSwapChain9** ppSwapChain) {
return CreateAdditionalSwapChainEx(pPresentationParameters, nullptr, ppSwapChain);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9** pSwapChain) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(pSwapChain);
auto* swapchain = GetInternalSwapchain(iSwapChain);
if (unlikely(swapchain == nullptr || pSwapChain == nullptr))
return D3DERR_INVALIDCALL;
*pSwapChain = static_cast<IDirect3DSwapChain9*>(ref(swapchain));
return D3D_OK;
}
UINT STDMETHODCALLTYPE D3D9DeviceEx::GetNumberOfSwapChains() {
D3D9DeviceLock lock = LockDevice();
return UINT(m_swapchains.size());
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Reset(D3DPRESENT_PARAMETERS* pPresentationParameters) {
D3D9DeviceLock lock = LockDevice();
HRESULT hr = ResetSwapChain(pPresentationParameters, nullptr);
if (FAILED(hr))
return hr;
hr = ResetState(pPresentationParameters);
if (FAILED(hr))
return hr;
Flush();
SynchronizeCsThread();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Present(
const RECT* pSourceRect,
const RECT* pDestRect,
HWND hDestWindowOverride,
const RGNDATA* pDirtyRegion) {
return PresentEx(
pSourceRect,
pDestRect,
hDestWindowOverride,
pDirtyRegion,
0);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetBackBuffer(
UINT iSwapChain,
UINT iBackBuffer,
D3DBACKBUFFER_TYPE Type,
IDirect3DSurface9** ppBackBuffer) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppBackBuffer);
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
return swapchain->GetBackBuffer(iBackBuffer, Type, ppBackBuffer);
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS* pRasterStatus) {
D3D9DeviceLock lock = LockDevice();
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
return swapchain->GetRasterStatus(pRasterStatus);
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDialogBoxMode(BOOL bEnableDialogs) {
D3D9DeviceLock lock = LockDevice();
return GetInternalSwapchain(0)->SetDialogBoxMode(bEnableDialogs);
}
void STDMETHODCALLTYPE D3D9DeviceEx::SetGammaRamp(
UINT iSwapChain,
DWORD Flags,
const D3DGAMMARAMP* pRamp) {
D3D9DeviceLock lock = LockDevice();
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
swapchain->SetGammaRamp(Flags, pRamp);
}
void STDMETHODCALLTYPE D3D9DeviceEx::GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP* pRamp) {
D3D9DeviceLock lock = LockDevice();
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
swapchain->GetGammaRamp(pRamp);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateTexture(
UINT Width,
UINT Height,
UINT Levels,
DWORD Usage,
D3DFORMAT Format,
D3DPOOL Pool,
IDirect3DTexture9** ppTexture,
HANDLE* pSharedHandle) {
InitReturnPtr(ppTexture);
if (unlikely(ppTexture == nullptr))
return D3DERR_INVALIDCALL;
D3D9_COMMON_TEXTURE_DESC desc;
desc.Width = Width;
desc.Height = Height;
desc.Depth = 1;
desc.ArraySize = 1;
desc.MipLevels = Levels;
desc.Usage = Usage;
desc.Format = EnumerateFormat(Format);
desc.Pool = Pool;
desc.Discard = FALSE;
desc.MultiSample = D3DMULTISAMPLE_NONE;
desc.MultisampleQuality = 0;
if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9Texture2D> texture = new D3D9Texture2D(this, &desc);
void* initialData = nullptr;
if (Pool == D3DPOOL_SYSTEMMEM && Levels == 1 && pSharedHandle != nullptr)
initialData = *(reinterpret_cast<void**>(pSharedHandle));
else // This must be a shared resource.
InitReturnPtr(pSharedHandle);
m_initializer->InitTexture(texture->GetCommonTexture(), initialData);
*ppTexture = texture.ref();
bool mipSuccess = (Usage & D3DUSAGE_AUTOGENMIPMAP) == (texture->GetCommonTexture()->Desc()->Usage & D3DUSAGE_AUTOGENMIPMAP);
return mipSuccess ? D3D_OK : D3DOK_NOAUTOGEN;
}
catch (const DxvkError& e) {
Logger::err(e.message());
return D3DERR_OUTOFVIDEOMEMORY;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVolumeTexture(
UINT Width,
UINT Height,
UINT Depth,
UINT Levels,
DWORD Usage,
D3DFORMAT Format,
D3DPOOL Pool,
IDirect3DVolumeTexture9** ppVolumeTexture,
HANDLE* pSharedHandle) {
InitReturnPtr(ppVolumeTexture);
InitReturnPtr(pSharedHandle);
if (unlikely(ppVolumeTexture == nullptr))
return D3DERR_INVALIDCALL;
D3D9_COMMON_TEXTURE_DESC desc;
desc.Width = Width;
desc.Height = Height;
desc.Depth = Depth;
desc.ArraySize = 1;
desc.MipLevels = Levels;
desc.Usage = Usage;
desc.Format = EnumerateFormat(Format);
desc.Pool = Pool;
desc.Discard = FALSE;
desc.MultiSample = D3DMULTISAMPLE_NONE;
desc.MultisampleQuality = 0;
if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9Texture3D> texture = new D3D9Texture3D(this, &desc);
m_initializer->InitTexture(texture->GetCommonTexture());
*ppVolumeTexture = texture.ref();
bool mipSuccess = (Usage & D3DUSAGE_AUTOGENMIPMAP) == (texture->GetCommonTexture()->Desc()->Usage & D3DUSAGE_AUTOGENMIPMAP);
return mipSuccess ? D3D_OK : D3DOK_NOAUTOGEN;
}
catch (const DxvkError& e) {
Logger::err(e.message());
return D3DERR_OUTOFVIDEOMEMORY;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateCubeTexture(
UINT EdgeLength,
UINT Levels,
DWORD Usage,
D3DFORMAT Format,
D3DPOOL Pool,
IDirect3DCubeTexture9** ppCubeTexture,
HANDLE* pSharedHandle) {
InitReturnPtr(ppCubeTexture);
InitReturnPtr(pSharedHandle);
if (unlikely(ppCubeTexture == nullptr))
return D3DERR_INVALIDCALL;
D3D9_COMMON_TEXTURE_DESC desc;
desc.Width = EdgeLength;
desc.Height = EdgeLength;
desc.Depth = 1;
desc.ArraySize = 6; // A cube has 6 faces, wowwie!
desc.MipLevels = Levels;
desc.Usage = Usage;
desc.Format = EnumerateFormat(Format);
desc.Pool = Pool;
desc.Discard = FALSE;
desc.MultiSample = D3DMULTISAMPLE_NONE;
desc.MultisampleQuality = 0;
if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9TextureCube> texture = new D3D9TextureCube(this, &desc);
m_initializer->InitTexture(texture->GetCommonTexture());
*ppCubeTexture = texture.ref();
bool mipSuccess = (Usage & D3DUSAGE_AUTOGENMIPMAP) == (texture->GetCommonTexture()->Desc()->Usage & D3DUSAGE_AUTOGENMIPMAP);
return mipSuccess ? D3D_OK : D3DOK_NOAUTOGEN;
}
catch (const DxvkError& e) {
Logger::err(e.message());
return D3DERR_OUTOFVIDEOMEMORY;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexBuffer(
UINT Length,
DWORD Usage,
DWORD FVF,
D3DPOOL Pool,
IDirect3DVertexBuffer9** ppVertexBuffer,
HANDLE* pSharedHandle) {
InitReturnPtr(ppVertexBuffer);
if (unlikely(ppVertexBuffer == nullptr))
return D3DERR_INVALIDCALL;
D3D9_BUFFER_DESC desc;
desc.Format = D3D9Format::VERTEXDATA;
desc.FVF = FVF;
desc.Pool = Pool;
desc.Size = Length;
desc.Type = D3DRTYPE_VERTEXBUFFER;
desc.Usage = Usage;
if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9VertexBuffer> buffer = new D3D9VertexBuffer(this, &desc);
m_initializer->InitBuffer(buffer->GetCommonBuffer());
*ppVertexBuffer = buffer.ref();
return D3D_OK;
}
catch (const DxvkError & e) {
Logger::err(e.message());
return D3DERR_INVALIDCALL;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateIndexBuffer(
UINT Length,
DWORD Usage,
D3DFORMAT Format,
D3DPOOL Pool,
IDirect3DIndexBuffer9** ppIndexBuffer,
HANDLE* pSharedHandle) {
InitReturnPtr(ppIndexBuffer);
if (unlikely(ppIndexBuffer == nullptr))
return D3DERR_INVALIDCALL;
D3D9_BUFFER_DESC desc;
desc.Format = EnumerateFormat(Format);
desc.Pool = Pool;
desc.Size = Length;
desc.Type = D3DRTYPE_INDEXBUFFER;
desc.Usage = Usage;
if (FAILED(D3D9CommonBuffer::ValidateBufferProperties(&desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9IndexBuffer> buffer = new D3D9IndexBuffer(this, &desc);
m_initializer->InitBuffer(buffer->GetCommonBuffer());
*ppIndexBuffer = buffer.ref();
return D3D_OK;
}
catch (const DxvkError & e) {
Logger::err(e.message());
return D3DERR_INVALIDCALL;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTarget(
UINT Width,
UINT Height,
D3DFORMAT Format,
D3DMULTISAMPLE_TYPE MultiSample,
DWORD MultisampleQuality,
BOOL Lockable,
IDirect3DSurface9** ppSurface,
HANDLE* pSharedHandle) {
return CreateRenderTargetEx(
Width,
Height,
Format,
MultiSample,
MultisampleQuality,
Lockable,
ppSurface,
pSharedHandle,
0);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurface(
UINT Width,
UINT Height,
D3DFORMAT Format,
D3DMULTISAMPLE_TYPE MultiSample,
DWORD MultisampleQuality,
BOOL Discard,
IDirect3DSurface9** ppSurface,
HANDLE* pSharedHandle) {
return CreateDepthStencilSurfaceEx(
Width,
Height,
Format,
MultiSample,
MultisampleQuality,
Discard,
ppSurface,
pSharedHandle,
0);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateSurface(
IDirect3DSurface9* pSourceSurface,
const RECT* pSourceRect,
IDirect3DSurface9* pDestinationSurface,
const POINT* pDestPoint) {
D3D9DeviceLock lock = LockDevice();
D3D9Surface* src = static_cast<D3D9Surface*>(pSourceSurface);
D3D9Surface* dst = static_cast<D3D9Surface*>(pDestinationSurface);
if (unlikely(src == nullptr || dst == nullptr))
return D3DERR_INVALIDCALL;
D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture();
D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture();
if (unlikely(srcTextureInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTextureInfo->Desc()->Pool != D3DPOOL_DEFAULT))
return D3DERR_INVALIDCALL;
if (unlikely(srcTextureInfo->Desc()->Format != dstTextureInfo->Desc()->Format))
return D3DERR_INVALIDCALL;
const DxvkFormatInfo* formatInfo = imageFormatInfo(dstTextureInfo->GetFormatMapping().FormatColor);
VkOffset3D srcBlockOffset = { 0u, 0u, 0u };
VkOffset3D dstOffset = { 0u, 0u, 0u };
VkExtent3D copyExtent = srcTextureInfo->GetExtentMip(src->GetSubresource());
if (pSourceRect != nullptr) {
srcBlockOffset = { pSourceRect->left / int32_t(formatInfo->blockSize.width),
pSourceRect->top / int32_t(formatInfo->blockSize.height),
0u };
copyExtent = { alignDown(uint32_t(pSourceRect->right - pSourceRect->left), formatInfo->blockSize.width),
alignDown(uint32_t(pSourceRect->bottom - pSourceRect->top), formatInfo->blockSize.height),
1u };
}
if (pDestPoint != nullptr) {
dstOffset = { alignDown(pDestPoint->x, formatInfo->blockSize.width),
alignDown(pDestPoint->y, formatInfo->blockSize.height),
0u };
}
const auto dstSubresource = vk::makeSubresourceLayers(
dstTextureInfo->GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT, dst->GetSubresource()));
Rc<DxvkBuffer> srcBuffer = srcTextureInfo->GetBuffer(src->GetSubresource());
Rc<DxvkImage> dstImage = dstTextureInfo->GetImage();
VkExtent3D levelExtent = srcTextureInfo->GetExtentMip(src->GetSubresource());
VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize);
VkDeviceSize srcByteOffset = srcBlockOffset.y * formatInfo->elementSize * blockCount.width
+ srcBlockOffset.x * formatInfo->elementSize;
VkExtent2D fullSrcExtent = VkExtent2D{ blockCount.width * formatInfo->blockSize.width,
blockCount.height * formatInfo->blockSize.height };
EmitCs([
cDstImage = std::move(dstImage),
cSrcBuffer = std::move(srcBuffer),
cDstLayers = dstSubresource,
cDstOffset = dstOffset,
cSrcOffset = srcByteOffset,
cCopyExtent = copyExtent,
cSrcExtent = fullSrcExtent
] (DxvkContext* ctx) {
ctx->copyBufferToImage(
cDstImage, cDstLayers, cDstOffset, cCopyExtent,
cSrcBuffer, cSrcOffset,
cSrcExtent);
});
dstTextureInfo->SetDirty(dst->GetSubresource(), true);
if (dstTextureInfo->IsAutomaticMip())
GenerateMips(dstTextureInfo);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::UpdateTexture(
IDirect3DBaseTexture9* pSourceTexture,
IDirect3DBaseTexture9* pDestinationTexture) {
D3D9DeviceLock lock = LockDevice();
if (!pDestinationTexture || !pSourceTexture)
return D3DERR_INVALIDCALL;
D3D9CommonTexture* dstTexInfo = GetCommonTexture(pDestinationTexture);
D3D9CommonTexture* srcTexInfo = GetCommonTexture(pSourceTexture);
if (unlikely(srcTexInfo->Desc()->Pool != D3DPOOL_SYSTEMMEM || dstTexInfo->Desc()->Pool != D3DPOOL_DEFAULT))
return D3DERR_INVALIDCALL;
const Rc<DxvkImage> dstImage = dstTexInfo->GetImage();
uint32_t mipLevels = std::min(srcTexInfo->Desc()->MipLevels, dstTexInfo->Desc()->MipLevels);
uint32_t arraySlices = std::min(srcTexInfo->Desc()->ArraySize, dstTexInfo->Desc()->ArraySize);
for (uint32_t a = 0; a < arraySlices; a++) {
for (uint32_t m = 0; m < mipLevels; m++) {
Rc<DxvkBuffer> srcBuffer = srcTexInfo->GetBuffer(srcTexInfo->CalcSubresource(a, m));
VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, m, a, 1 };
VkExtent3D extent = dstImage->mipLevelExtent(m);
EmitCs([
cDstImage = dstImage,
cSrcBuffer = srcBuffer,
cDstLayers = dstLayers,
cExtent = extent
] (DxvkContext* ctx) {
ctx->copyBufferToImage(
cDstImage, cDstLayers,
VkOffset3D{ 0, 0, 0 }, cExtent,
cSrcBuffer, 0, { 0u, 0u });
});
}
}
dstTexInfo->MarkAllDirty();
pDestinationTexture->GenerateMipSubLevels();
FlushImplicit(false);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTargetData(
IDirect3DSurface9* pRenderTarget,
IDirect3DSurface9* pDestSurface) {
D3D9DeviceLock lock = LockDevice();
D3D9Surface* src = static_cast<D3D9Surface*>(pRenderTarget);
D3D9Surface* dst = static_cast<D3D9Surface*>(pDestSurface);
if (unlikely(src == nullptr || dst == nullptr))
return D3DERR_INVALIDCALL;
if (pRenderTarget == pDestSurface)
return D3D_OK;
D3D9CommonTexture* dstTexInfo = GetCommonTexture(dst);
D3D9CommonTexture* srcTexInfo = GetCommonTexture(src);
if (srcTexInfo->Desc()->Format != dstTexInfo->Desc()->Format)
return D3DERR_INVALIDCALL;
if (dstTexInfo->Desc()->Pool == D3DPOOL_DEFAULT)
return this->StretchRect(pRenderTarget, nullptr, pDestSurface, nullptr, D3DTEXF_NONE);
Rc<DxvkBuffer> dstBuffer = dstTexInfo->GetBuffer(dst->GetSubresource());
Rc<DxvkImage> srcImage = srcTexInfo->GetImage();
const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format);
const VkImageSubresource srcSubresource = srcTexInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource());
VkImageSubresourceLayers srcSubresourceLayers = {
srcSubresource.aspectMask,
srcSubresource.mipLevel,
srcSubresource.arrayLayer, 1 };
VkExtent3D srcExtent = srcTexInfo->GetExtentMip(src->GetMipLevel());
EmitCs([
cBuffer = dstBuffer,
cImage = srcImage,
cSubresources = srcSubresourceLayers,
cLevelExtent = srcExtent
] (DxvkContext* ctx) {
ctx->copyImageToBuffer(
cBuffer, 0, VkExtent2D { 0u, 0u },
cImage, cSubresources, VkOffset3D { 0, 0, 0 },
cLevelExtent);
});
// We need to force a wait here
// as some applications depend on
// DO_NOT_WAIT not applying after
// this has happened.
// (this is a blocking call)
WaitForResource(dstBuffer, 0);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9* pDestSurface) {
D3D9DeviceLock lock = LockDevice();
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
return swapchain->GetFrontBufferData(pDestSurface);
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::StretchRect(
IDirect3DSurface9* pSourceSurface,
const RECT* pSourceRect,
IDirect3DSurface9* pDestSurface,
const RECT* pDestRect,
D3DTEXTUREFILTERTYPE Filter) {
D3D9DeviceLock lock = LockDevice();
D3D9Surface* dst = static_cast<D3D9Surface*>(pDestSurface);
D3D9Surface* src = static_cast<D3D9Surface*>(pSourceSurface);
if (unlikely(src == nullptr || dst == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(src == dst))
return D3DERR_INVALIDCALL;
bool fastPath = true;
D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture();
D3D9CommonTexture* srcTextureInfo = src->GetCommonTexture();
Rc<DxvkImage> dstImage = dstTextureInfo->GetImage();
Rc<DxvkImage> srcImage = srcTextureInfo->GetImage();
const DxvkFormatInfo* dstFormatInfo = imageFormatInfo(dstImage->info().format);
const DxvkFormatInfo* srcFormatInfo = imageFormatInfo(srcImage->info().format);
const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex(dstFormatInfo->aspectMask, dst->GetSubresource());
const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex(srcFormatInfo->aspectMask, src->GetSubresource());
VkExtent3D srcExtent = srcImage->mipLevelExtent(srcSubresource.mipLevel);
VkExtent3D dstExtent = dstImage->mipLevelExtent(dstSubresource.mipLevel);
D3D9Format srcFormat = srcTextureInfo->Desc()->Format;
D3D9Format dstFormat = dstTextureInfo->Desc()->Format;
// We may only fast path copy non identicals one way!
// We don't know what garbage could be in the X8 data.
bool similar = AreFormatsSimilar(srcFormat, dstFormat);
// Copies are only supported on similar formats.
fastPath &= similar;
// Copies are only supported if the sample count matches,
// otherwise we need to resolve.
bool needsResolve = srcImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT;
bool fbBlit = dstImage->info().sampleCount != VK_SAMPLE_COUNT_1_BIT;
fastPath &= !fbBlit;
// Copies would only work if we are block aligned.
if (pSourceRect != nullptr) {
fastPath &= (pSourceRect->left % srcFormatInfo->blockSize.width == 0);
fastPath &= (pSourceRect->right % srcFormatInfo->blockSize.width == 0);
fastPath &= (pSourceRect->top % srcFormatInfo->blockSize.height == 0);
fastPath &= (pSourceRect->bottom % srcFormatInfo->blockSize.height == 0);
}
if (pDestRect != nullptr) {
fastPath &= (pDestRect->left % dstFormatInfo->blockSize.width == 0);
fastPath &= (pDestRect->top % dstFormatInfo->blockSize.height == 0);
}
VkImageSubresourceLayers dstSubresourceLayers = {
dstSubresource.aspectMask,
dstSubresource.mipLevel,
dstSubresource.arrayLayer, 1 };
VkImageSubresourceLayers srcSubresourceLayers = {
srcSubresource.aspectMask,
srcSubresource.mipLevel,
srcSubresource.arrayLayer, 1 };
VkImageBlit blitInfo;
blitInfo.dstSubresource = dstSubresourceLayers;
blitInfo.srcSubresource = srcSubresourceLayers;
blitInfo.dstOffsets[0] = pDestRect != nullptr
? VkOffset3D{ int32_t(pDestRect->left), int32_t(pDestRect->top), 0 }
: VkOffset3D{ 0, 0, 0 };
blitInfo.dstOffsets[1] = pDestRect != nullptr
? VkOffset3D{ int32_t(pDestRect->right), int32_t(pDestRect->bottom), 1 }
: VkOffset3D{ int32_t(dstExtent.width), int32_t(dstExtent.height), 1 };
blitInfo.srcOffsets[0] = pSourceRect != nullptr
? VkOffset3D{ int32_t(pSourceRect->left), int32_t(pSourceRect->top), 0 }
: VkOffset3D{ 0, 0, 0 };
blitInfo.srcOffsets[1] = pSourceRect != nullptr
? VkOffset3D{ int32_t(pSourceRect->right), int32_t(pSourceRect->bottom), 1 }
: VkOffset3D{ int32_t(srcExtent.width), int32_t(srcExtent.height), 1 };
VkExtent3D srcCopyExtent =
{ uint32_t(blitInfo.srcOffsets[1].x - blitInfo.srcOffsets[0].x),
uint32_t(blitInfo.srcOffsets[1].y - blitInfo.srcOffsets[0].y),
uint32_t(blitInfo.srcOffsets[1].z - blitInfo.srcOffsets[0].z) };
VkExtent3D dstCopyExtent =
{ uint32_t(blitInfo.dstOffsets[1].x - blitInfo.dstOffsets[0].x),
uint32_t(blitInfo.dstOffsets[1].y - blitInfo.dstOffsets[0].y),
uint32_t(blitInfo.dstOffsets[1].z - blitInfo.dstOffsets[0].z) };
// Copies would only work if the extents match. (ie. no stretching)
bool stretch = srcCopyExtent != dstCopyExtent;
fastPath &= !stretch;
if (fastPath) {
if (needsResolve) {
VkImageResolve region;
region.srcSubresource = blitInfo.srcSubresource;
region.srcOffset = blitInfo.srcOffsets[0];
region.dstSubresource = blitInfo.dstSubresource;
region.dstOffset = blitInfo.dstOffsets[0];
region.extent = srcCopyExtent;
EmitCs([
cDstImage = dstImage,
cSrcImage = srcImage,
cRegion = region
] (DxvkContext* ctx) {
ctx->resolveImage(
cDstImage, cSrcImage, cRegion,
VK_FORMAT_UNDEFINED);
});
} else {
EmitCs([
cDstImage = dstImage,
cSrcImage = srcImage,
cDstLayers = blitInfo.dstSubresource,
cSrcLayers = blitInfo.srcSubresource,
cDstOffset = blitInfo.dstOffsets[0],
cSrcOffset = blitInfo.srcOffsets[0],
cExtent = srcCopyExtent
] (DxvkContext* ctx) {
ctx->copyImage(
cDstImage, cDstLayers, cDstOffset,
cSrcImage, cSrcLayers, cSrcOffset,
cExtent);
});
}
}
else {
if (needsResolve) {
auto resolveSrc = srcTextureInfo->GetResolveImage();
VkImageResolve region;
region.srcSubresource = blitInfo.srcSubresource;
region.srcOffset = blitInfo.srcOffsets[0];
region.dstSubresource = blitInfo.srcSubresource;
region.dstOffset = blitInfo.srcOffsets[0];
region.extent = srcCopyExtent;
EmitCs([
cDstImage = resolveSrc,
cSrcImage = srcImage,
cRegion = region
] (DxvkContext* ctx) {
ctx->resolveImage(
cDstImage, cSrcImage, cRegion,
VK_FORMAT_UNDEFINED);
});
srcImage = resolveSrc;
}
EmitCs([
cDstImage = dstImage,
cDstMap = dstTextureInfo->GetMapping().Swizzle,
cSrcImage = srcImage,
cSrcMap = srcTextureInfo->GetMapping().Swizzle,
cBlitInfo = blitInfo,
cFilter = stretch ? DecodeFilter(Filter) : VK_FILTER_NEAREST
] (DxvkContext* ctx) {
ctx->blitImage(
cDstImage,
cDstMap,
cSrcImage,
cSrcMap,
cBlitInfo,
cFilter);
});
}
dstTextureInfo->SetDirty(dst->GetSubresource(), true);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ColorFill(
IDirect3DSurface9* pSurface,
const RECT* pRect,
D3DCOLOR Color) {
D3D9DeviceLock lock = LockDevice();
D3D9Surface* dst = static_cast<D3D9Surface*>(pSurface);
if (unlikely(dst == nullptr))
return D3DERR_INVALIDCALL;
D3D9CommonTexture* dstTextureInfo = dst->GetCommonTexture();
VkExtent3D mipExtent = dstTextureInfo->GetExtentMip(dst->GetSubresource());
VkOffset3D offset = VkOffset3D{ 0u, 0u, 0u };
VkExtent3D extent = mipExtent;
bool isFullExtent = true;
if (pRect != nullptr) {
ConvertRect(*pRect, offset, extent);
isFullExtent = offset == VkOffset3D{ 0u, 0u, 0u }
&& extent == mipExtent;
}
Rc<DxvkImageView> imageView = dst->GetImageView(false);
Rc<DxvkImageView> renderTargetView = dst->GetRenderTargetView(false);
VkClearValue clearValue;
DecodeD3DCOLOR(Color, clearValue.color.float32);
// Fast path for games that may use this as an
// alternative to Clear on render targets.
if (isFullExtent && renderTargetView != nullptr) {
EmitCs([
cImageView = renderTargetView,
cClearValue = clearValue
] (DxvkContext* ctx) {
ctx->clearRenderTarget(
cImageView,
VK_IMAGE_ASPECT_COLOR_BIT,
cClearValue);
});
} else {
EmitCs([
cImageView = imageView,
cOffset = offset,
cExtent = extent,
cClearValue = clearValue
] (DxvkContext* ctx) {
ctx->clearImageView(
cImageView,
cOffset, cExtent,
VK_IMAGE_ASPECT_COLOR_BIT,
cClearValue);
});
}
dstTextureInfo->SetDirty(dst->GetSubresource(), true);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurface(
UINT Width,
UINT Height,
D3DFORMAT Format,
D3DPOOL Pool,
IDirect3DSurface9** ppSurface,
HANDLE* pSharedHandle) {
return CreateOffscreenPlainSurfaceEx(
Width, Height,
Format, Pool,
ppSurface, pSharedHandle,
0);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderTarget(
DWORD RenderTargetIndex,
IDirect3DSurface9* pRenderTarget) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(RenderTargetIndex >= caps::MaxSimultaneousRenderTargets
|| (pRenderTarget == nullptr && RenderTargetIndex == 0)))
return D3DERR_INVALIDCALL;
D3D9Surface* rt = static_cast<D3D9Surface*>(pRenderTarget);
const auto* desc = rt != nullptr
? rt->GetCommonTexture()->Desc()
: nullptr;
if (unlikely(desc && !(desc->Usage & D3DUSAGE_RENDERTARGET)))
return D3DERR_INVALIDCALL;
if (RenderTargetIndex == 0) {
auto rtSize = rt->GetSurfaceExtent();
D3DVIEWPORT9 viewport;
viewport.X = 0;
viewport.Y = 0;
viewport.Width = rtSize.width;
viewport.Height = rtSize.height;
viewport.MinZ = 0.0f;
viewport.MaxZ = 1.0f;
RECT scissorRect;
scissorRect.left = 0;
scissorRect.top = 0;
scissorRect.right = rtSize.width;
scissorRect.bottom = rtSize.height;
if (m_state.viewport != viewport) {
m_flags.set(D3D9DeviceFlag::DirtyFFViewport);
m_flags.set(D3D9DeviceFlag::DirtyPointScale);
m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
m_state.viewport = viewport;
}
if (m_state.scissorRect != scissorRect) {
m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
m_state.scissorRect = scissorRect;
}
}
if (m_state.renderTargets[RenderTargetIndex] == rt)
return D3D_OK;
// Do a strong flush if the first render target is changed.
FlushImplicit(RenderTargetIndex == 0 ? TRUE : FALSE);
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
m_state.renderTargets[RenderTargetIndex] = rt;
UpdateActiveRTs(RenderTargetIndex);
uint32_t originalAlphaSwizzleRTs = m_alphaSwizzleRTs;
m_alphaSwizzleRTs &= ~(1 << RenderTargetIndex);
if (rt != nullptr && rt->GetCommonTexture()->GetMapping().Swizzle.a == VK_COMPONENT_SWIZZLE_ONE)
m_alphaSwizzleRTs |= 1 << RenderTargetIndex;
if (originalAlphaSwizzleRTs != m_alphaSwizzleRTs)
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
if (RenderTargetIndex == 0) {
bool validSampleMask = desc->MultiSample > D3DMULTISAMPLE_NONMASKABLE;
if (validSampleMask != m_flags.test(D3D9DeviceFlag::ValidSampleMask)) {
m_flags.clr(D3D9DeviceFlag::ValidSampleMask);
if (validSampleMask)
m_flags.set(D3D9DeviceFlag::ValidSampleMask);
m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
}
}
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderTarget(
DWORD RenderTargetIndex,
IDirect3DSurface9** ppRenderTarget) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppRenderTarget);
if (unlikely(ppRenderTarget == nullptr || RenderTargetIndex > caps::MaxSimultaneousRenderTargets))
return D3DERR_INVALIDCALL;
if (m_state.renderTargets[RenderTargetIndex] == nullptr)
return D3DERR_NOTFOUND;
*ppRenderTarget = m_state.renderTargets[RenderTargetIndex].ref();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetDepthStencilSurface(IDirect3DSurface9* pNewZStencil) {
D3D9DeviceLock lock = LockDevice();
D3D9Surface* ds = static_cast<D3D9Surface*>(pNewZStencil);
if (unlikely(ds && !(ds->GetCommonTexture()->Desc()->Usage & D3DUSAGE_DEPTHSTENCIL)))
return D3DERR_INVALIDCALL;
if (m_state.depthStencil == ds)
return D3D_OK;
FlushImplicit(FALSE);
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
if (ds != nullptr) {
float rValue = GetDepthBufferRValue(ds->GetCommonTexture()->GetFormatMapping().FormatColor);
if (m_depthBiasScale != rValue) {
m_depthBiasScale = rValue;
m_flags.set(D3D9DeviceFlag::DirtyDepthBias);
}
}
m_state.depthStencil = ds;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDepthStencilSurface(IDirect3DSurface9** ppZStencilSurface) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppZStencilSurface);
if (unlikely(ppZStencilSurface == nullptr))
return D3DERR_INVALIDCALL;
if (m_state.depthStencil == nullptr)
return D3DERR_NOTFOUND;
*ppZStencilSurface = m_state.depthStencil.ref();
return D3D_OK;
}
// The Begin/EndScene functions actually do nothing.
// Some games don't even call them.
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginScene() {
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndScene() {
FlushImplicit(true);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::Clear(
DWORD Count,
const D3DRECT* pRects,
DWORD Flags,
D3DCOLOR Color,
float Z,
DWORD Stencil) {
D3D9DeviceLock lock = LockDevice();
const auto& vp = m_state.viewport;
const auto& sc = m_state.scissorRect;
bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE];
bool scissor = m_state.renderStates[D3DRS_SCISSORTESTENABLE];
VkOffset3D offset = { int32_t(vp.X), int32_t(vp.Y), 0 };
VkExtent3D extent = { vp.Width, vp.Height, 1u };
if (scissor) {
offset.x = std::max<int32_t> (offset.x, sc.left);
offset.y = std::max<int32_t> (offset.y, sc.top);
extent.width = std::min<uint32_t>(extent.width, sc.right - offset.x);
extent.height = std::min<uint32_t>(extent.height, sc.bottom - offset.y);
}
// This becomes pretty unreadable in one singular if statement...
if (Count) {
// If pRects is null, or our first rect encompasses the viewport:
if (!pRects)
Count = 0;
else if (pRects[0].x1 <= offset.x && pRects[0].y1 <= offset.y
&& pRects[0].x2 >= offset.x + int32_t(extent.width) && pRects[0].y2 >= offset.y + int32_t(extent.height))
Count = 0;
}
// Here, Count of 0 will denote whether or not to care about user rects.
VkClearValue clearValueDepth;
clearValueDepth.depthStencil.depth = Z;
clearValueDepth.depthStencil.stencil = Stencil;
VkClearValue clearValueColor;
DecodeD3DCOLOR(Color, clearValueColor.color.float32);
auto dsv = m_state.depthStencil != nullptr ? m_state.depthStencil->GetDepthStencilView() : nullptr;
VkImageAspectFlags depthAspectMask = 0;
if (dsv != nullptr) {
if (Flags & D3DCLEAR_ZBUFFER)
depthAspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (Flags & D3DCLEAR_STENCIL)
depthAspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
depthAspectMask &= imageFormatInfo(dsv->info().format)->aspectMask;
}
auto ClearImageView = [this](
bool fullClear,
VkOffset3D offset,
VkExtent3D extent,
Rc<DxvkImageView> imageView,
VkImageAspectFlags aspectMask,
VkClearValue clearValue) {
if (fullClear) {
EmitCs([
cClearValue = clearValue,
cAspectMask = aspectMask,
cImageView = imageView
] (DxvkContext* ctx) {
ctx->clearRenderTarget(
cImageView,
cAspectMask,
cClearValue);
});
}
else {
EmitCs([
cClearValue = clearValue,
cAspectMask = aspectMask,
cImageView = imageView,
cOffset = offset,
cExtent = extent
] (DxvkContext* ctx) {
ctx->clearImageView(
cImageView,
cOffset, cExtent,
cAspectMask,
cClearValue);
});
}
};
auto ClearViewRect = [&](
bool fullClear,
VkOffset3D offset,
VkExtent3D extent) {
// Clear depth if we need to.
if (depthAspectMask != 0)
ClearImageView(fullClear, offset, extent, dsv, depthAspectMask, clearValueDepth);
// Clear render targets if we need to.
if (Flags & D3DCLEAR_TARGET) {
for (auto rt : m_state.renderTargets) {
auto rtv = rt != nullptr ? rt->GetRenderTargetView(srgb) : nullptr;
if (unlikely(rtv != nullptr))
ClearImageView(fullClear, offset, extent, rtv, VK_IMAGE_ASPECT_COLOR_BIT, clearValueColor);
}
}
};
// A Hat in Time and other UE3 games only gets partial clears here
// because of an oversized rt height due to their weird alignment...
// This works around that.
uint32_t alignment = m_d3d9Options.lenientClear ? 8 : 1;
auto rtSize = m_state.renderTargets[0]->GetSurfaceExtent();
bool extentMatches = align(extent.width, alignment) == align(rtSize.width, alignment)
&& align(extent.height, alignment) == align(rtSize.height, alignment);
bool rtSizeMatchesClearSize = offset.x == 0 && offset.y == 0 && extentMatches;
if (likely(!Count && rtSizeMatchesClearSize)) {
// Fast path w/ ClearRenderTarget for when
// our viewport and stencils match the RT size
ClearViewRect(true, offset, extent);
}
else if (!Count) {
// Clear our viewport & scissor minified region in this rendertarget.
ClearViewRect(false, offset, extent);
}
else {
// Clear the application provided rects.
for (uint32_t i = 0; i < Count; i++) {
VkOffset3D rectOffset = {
std::max<int32_t>(pRects[i].x1, offset.x),
std::max<int32_t>(pRects[i].y1, offset.y),
0
};
VkExtent3D rectExtent = {
std::min<uint32_t>(pRects[i].x2, offset.x + extent.width) - rectOffset.x,
std::min<uint32_t>(pRects[i].y2, offset.y + extent.height) - rectOffset.y,
1u
};
ClearViewRect(false, rectOffset, rectExtent);
}
}
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX* pMatrix) {
return SetStateTransform(GetTransformIndex(State), pMatrix);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX* pMatrix) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pMatrix == nullptr))
return D3DERR_INVALIDCALL;
*pMatrix = bit::cast<D3DMATRIX>(m_state.transforms[GetTransformIndex(State)]);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::MultiplyTransform(D3DTRANSFORMSTATETYPE TransformState, const D3DMATRIX* pMatrix) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(ShouldRecord()))
return m_recorder->MultiplyStateTransform(TransformState, pMatrix);
uint32_t idx = GetTransformIndex(TransformState);
m_state.transforms[idx] = ConvertMatrix(pMatrix) * m_state.transforms[idx];
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetViewport(const D3DVIEWPORT9* pViewport) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pViewport == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord()))
return m_recorder->SetViewport(pViewport);
if (m_state.viewport == *pViewport)
return D3D_OK;
m_state.viewport = *pViewport;
m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
m_flags.set(D3D9DeviceFlag::DirtyFFViewport);
m_flags.set(D3D9DeviceFlag::DirtyPointScale);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetViewport(D3DVIEWPORT9* pViewport) {
D3D9DeviceLock lock = LockDevice();
if (pViewport == nullptr)
return D3DERR_INVALIDCALL;
*pViewport = m_state.viewport;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaterial(const D3DMATERIAL9* pMaterial) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pMaterial == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord()))
return m_recorder->SetMaterial(pMaterial);
m_state.material = *pMaterial;
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaterial(D3DMATERIAL9* pMaterial) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pMaterial == nullptr))
return D3DERR_INVALIDCALL;
*pMaterial = m_state.material;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetLight(DWORD Index, const D3DLIGHT9* pLight) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pLight == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord())) {
Logger::warn("D3D9DeviceEx::SetLight: State block not implemented.");
return D3D_OK;
}
if (Index >= m_state.lights.size())
m_state.lights.resize(Index + 1);
m_state.lights[Index] = *pLight;
if (m_state.IsLightEnabled(Index))
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLight(DWORD Index, D3DLIGHT9* pLight) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pLight == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index]))
return D3DERR_INVALIDCALL;
*pLight = m_state.lights[Index].value();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::LightEnable(DWORD Index, BOOL Enable) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(Index >= m_state.lights.size()))
m_state.lights.resize(Index + 1);
if (unlikely(!m_state.lights[Index]))
m_state.lights[Index] = DefaultLight;
if (m_state.IsLightEnabled(Index) == !!Enable)
return D3D_OK;
uint32_t searchIndex = UINT32_MAX;
uint32_t setIndex = Index;
if (!Enable)
std::swap(searchIndex, setIndex);
for (auto& idx : m_state.enabledLightIndices) {
if (idx == searchIndex) {
idx = setIndex;
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
break;
}
}
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetLightEnable(DWORD Index, BOOL* pEnable) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pEnable == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(Index >= m_state.lights.size() || !m_state.lights[Index]))
return D3DERR_INVALIDCALL;
*pEnable = m_state.IsLightEnabled(Index) ? 128 : 0; // Weird quirk but OK.
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipPlane(DWORD Index, const float* pPlane) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(Index >= caps::MaxClipPlanes || !pPlane))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord()))
return m_recorder->SetClipPlane(Index, pPlane);
bool dirty = false;
for (uint32_t i = 0; i < 4; i++) {
dirty |= m_state.clipPlanes[Index].coeff[i] != pPlane[i];
m_state.clipPlanes[Index].coeff[i] = pPlane[i];
}
bool enabled = m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1u << Index);
dirty &= enabled;
if (dirty)
m_flags.set(D3D9DeviceFlag::DirtyClipPlanes);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipPlane(DWORD Index, float* pPlane) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(Index >= caps::MaxClipPlanes || !pPlane))
return D3DERR_INVALIDCALL;
for (uint32_t i = 0; i < 4; i++)
pPlane[i] = m_state.clipPlanes[Index].coeff[i];
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) {
D3D9DeviceLock lock = LockDevice();
// D3D9 only allows reading for values 0 and 7-255 so we don't need to do anything but return OK
if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) {
return D3D_OK;
}
if (unlikely(ShouldRecord()))
return m_recorder->SetRenderState(State, Value);
auto& states = m_state.renderStates;
bool changed = states[State] != Value;
if (likely(changed)) {
const bool oldDepthBiasEnabled = IsDepthBiasEnabled();
const bool oldATOC = IsAlphaToCoverageEnabled();
const bool oldNVDB = states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB);
const bool oldAlphaTest = IsAlphaTestEnabled();
// AMD's driver hack for ATOC and RESZ
if (unlikely(State == D3DRS_POINTSIZE)) {
// ATOC
constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::A2M1);
constexpr uint32_t AlphaToCoverageDisable = uint32_t(D3D9Format::A2M0);
if (Value == AlphaToCoverageEnable
|| Value == AlphaToCoverageDisable) {
m_amdATOC = Value == AlphaToCoverageEnable;
bool newATOC = IsAlphaToCoverageEnabled();
bool newAlphaTest = IsAlphaTestEnabled();
if (oldATOC != newATOC)
m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
if (oldAlphaTest != newAlphaTest)
m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
return D3D_OK;
}
// RESZ
constexpr uint32_t RESZ = 0x7fa05000;
if (Value == RESZ) {
ResolveZ();
return D3D_OK;
}
}
// NV's driver hack for ATOC.
if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
constexpr uint32_t AlphaToCoverageEnable = uint32_t(D3D9Format::ATOC);
constexpr uint32_t AlphaToCoverageDisable = 0;
if (Value == AlphaToCoverageEnable
|| Value == AlphaToCoverageDisable) {
m_nvATOC = Value == AlphaToCoverageEnable;
bool newATOC = IsAlphaToCoverageEnabled();
bool newAlphaTest = IsAlphaToCoverageEnabled();
if (oldATOC != newATOC)
m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
if (oldAlphaTest != newAlphaTest)
m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
return D3D_OK;
}
if (Value == uint32_t(D3D9Format::COPM)) {
// UE3 calls this MinimalNVIDIADriverShaderOptimization
Logger::info("D3D9DeviceEx::SetRenderState: MinimalNVIDIADriverShaderOptimization is unsupported");
return D3D_OK;
}
}
states[State] = Value;
switch (State) {
case D3DRS_SEPARATEALPHABLENDENABLE:
case D3DRS_ALPHABLENDENABLE:
case D3DRS_BLENDOP:
case D3DRS_BLENDOPALPHA:
case D3DRS_DESTBLEND:
case D3DRS_DESTBLENDALPHA:
case D3DRS_SRCBLEND:
case D3DRS_SRCBLENDALPHA:
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_COLORWRITEENABLE:
UpdateActiveRTs(0);
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_COLORWRITEENABLE1:
UpdateActiveRTs(1);
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_COLORWRITEENABLE2:
UpdateActiveRTs(2);
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_COLORWRITEENABLE3:
UpdateActiveRTs(3);
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_ALPHATESTENABLE: {
bool newATOC = IsAlphaToCoverageEnabled();
bool newAlphaTest = IsAlphaTestEnabled();
if (oldATOC != newATOC)
m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
if (oldAlphaTest != newAlphaTest)
m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
break;
}
case D3DRS_ALPHAFUNC:
m_flags.set(D3D9DeviceFlag::DirtyAlphaTestState);
break;
case D3DRS_BLENDFACTOR:
BindBlendFactor();
break;
case D3DRS_MULTISAMPLEMASK:
if (m_flags.test(D3D9DeviceFlag::ValidSampleMask))
m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
break;
case D3DRS_ZENABLE:
case D3DRS_ZFUNC:
case D3DRS_TWOSIDEDSTENCILMODE:
case D3DRS_ZWRITEENABLE:
case D3DRS_STENCILENABLE:
case D3DRS_STENCILFAIL:
case D3DRS_STENCILZFAIL:
case D3DRS_STENCILPASS:
case D3DRS_STENCILFUNC:
case D3DRS_CCW_STENCILFAIL:
case D3DRS_CCW_STENCILZFAIL:
case D3DRS_CCW_STENCILPASS:
case D3DRS_CCW_STENCILFUNC:
case D3DRS_STENCILMASK:
case D3DRS_STENCILWRITEMASK:
m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState);
break;
case D3DRS_STENCILREF:
BindDepthStencilRefrence();
break;
case D3DRS_SCISSORTESTENABLE:
m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
break;
case D3DRS_SRGBWRITEENABLE:
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
break;
case D3DRS_DEPTHBIAS:
case D3DRS_SLOPESCALEDEPTHBIAS: {
const bool depthBiasEnabled = IsDepthBiasEnabled();
if (depthBiasEnabled != oldDepthBiasEnabled)
m_flags.set(D3D9DeviceFlag::DirtyRasterizerState);
if (depthBiasEnabled)
m_flags.set(D3D9DeviceFlag::DirtyDepthBias);
break;
}
case D3DRS_CULLMODE:
case D3DRS_FILLMODE:
m_flags.set(D3D9DeviceFlag::DirtyRasterizerState);
break;
case D3DRS_CLIPPLANEENABLE:
m_flags.set(D3D9DeviceFlag::DirtyClipPlanes);
break;
case D3DRS_ALPHAREF:
UpdatePushConstant<D3D9RenderStateItem::AlphaRef>();
break;
case D3DRS_TEXTUREFACTOR:
m_flags.set(D3D9DeviceFlag::DirtyFFPixelData);
break;
case D3DRS_DIFFUSEMATERIALSOURCE:
case D3DRS_AMBIENTMATERIALSOURCE:
case D3DRS_SPECULARMATERIALSOURCE:
case D3DRS_EMISSIVEMATERIALSOURCE:
case D3DRS_COLORVERTEX:
case D3DRS_LIGHTING:
case D3DRS_NORMALIZENORMALS:
case D3DRS_LOCALVIEWER:
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
break;
case D3DRS_AMBIENT:
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
break;
case D3DRS_SPECULARENABLE:
m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
break;
case D3DRS_FOGENABLE:
case D3DRS_FOGVERTEXMODE:
case D3DRS_FOGTABLEMODE:
m_flags.set(D3D9DeviceFlag::DirtyFogState);
break;
case D3DRS_RANGEFOGENABLE:
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
break;
case D3DRS_FOGCOLOR:
m_flags.set(D3D9DeviceFlag::DirtyFogColor);
break;
case D3DRS_FOGSTART:
m_flags.set(D3D9DeviceFlag::DirtyFogScale);
break;
case D3DRS_FOGEND:
m_flags.set(D3D9DeviceFlag::DirtyFogScale);
m_flags.set(D3D9DeviceFlag::DirtyFogEnd);
break;
case D3DRS_FOGDENSITY:
m_flags.set(D3D9DeviceFlag::DirtyFogDensity);
break;
case D3DRS_POINTSIZE:
UpdatePushConstant<D3D9RenderStateItem::PointSize>();
break;
case D3DRS_POINTSIZE_MIN:
UpdatePushConstant<D3D9RenderStateItem::PointSizeMin>();
break;
case D3DRS_POINTSIZE_MAX:
UpdatePushConstant<D3D9RenderStateItem::PointSizeMax>();
break;
case D3DRS_POINTSCALE_A:
case D3DRS_POINTSCALE_B:
case D3DRS_POINTSCALE_C:
m_flags.set(D3D9DeviceFlag::DirtyPointScale);
break;
case D3DRS_POINTSCALEENABLE:
case D3DRS_POINTSPRITEENABLE:
// Nothing to do here!
// This is handled in UpdatePointMode.
break;
case D3DRS_SHADEMODE:
if (m_state.pixelShader != nullptr) {
BindShader<DxsoProgramType::PixelShader>(
GetCommonShader(m_state.pixelShader),
GetPixelShaderPermutation());
}
m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
break;
case D3DRS_TWEENFACTOR:
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
break;
case D3DRS_VERTEXBLEND:
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
break;
case D3DRS_INDEXEDVERTEXBLENDENABLE:
if (CanSWVP() && Value)
m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
break;
case D3DRS_ADAPTIVETESS_X:
case D3DRS_ADAPTIVETESS_Z:
case D3DRS_ADAPTIVETESS_W:
if (states[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB) || oldNVDB) {
m_flags.set(D3D9DeviceFlag::DirtyDepthBounds);
break;
}
default:
static bool s_errorShown[256];
if (!std::exchange(s_errorShown[State], true))
Logger::warn(str::format("D3D9DeviceEx::SetRenderState: Unhandled render state ", State));
break;
}
}
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetRenderState(D3DRENDERSTATETYPE State, DWORD* pValue) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pValue == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(State > 255 || (State < D3DRS_ZENABLE && State != 0))) {
return D3DERR_INVALIDCALL;
}
if (State < D3DRS_ZENABLE || State > D3DRS_BLENDOPALPHA)
*pValue = 0;
else
*pValue = m_state.renderStates[State];
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateStateBlock(
D3DSTATEBLOCKTYPE Type,
IDirect3DStateBlock9** ppSB) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppSB);
if (unlikely(ppSB == nullptr))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9StateBlock> sb = new D3D9StateBlock(this, ConvertStateBlockType(Type));
*ppSB = sb.ref();
return D3D_OK;
}
catch (const DxvkError & e) {
Logger::err(e.message());
return D3DERR_INVALIDCALL;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::BeginStateBlock() {
D3D9DeviceLock lock = LockDevice();
if (unlikely(m_recorder != nullptr))
return D3DERR_INVALIDCALL;
m_recorder = new D3D9StateBlock(this, D3D9StateBlockType::None);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::EndStateBlock(IDirect3DStateBlock9** ppSB) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppSB);
if (unlikely(ppSB == nullptr || m_recorder == nullptr))
return D3DERR_INVALIDCALL;
*ppSB = m_recorder.ref();
m_recorder = nullptr;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetClipStatus(const D3DCLIPSTATUS9* pClipStatus) {
Logger::warn("D3D9DeviceEx::SetClipStatus: Stub");
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetClipStatus(D3DCLIPSTATUS9* pClipStatus) {
Logger::warn("D3D9DeviceEx::GetClipStatus: Stub");
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTexture(DWORD Stage, IDirect3DBaseTexture9** ppTexture) {
D3D9DeviceLock lock = LockDevice();
if (ppTexture == nullptr)
return D3DERR_INVALIDCALL;
*ppTexture = nullptr;
if (unlikely(InvalidSampler(Stage)))
return D3D_OK;
DWORD stateSampler = RemapSamplerState(Stage);
*ppTexture = ref(m_state.textures[stateSampler]);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTexture(DWORD Stage, IDirect3DBaseTexture9* pTexture) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(InvalidSampler(Stage)))
return D3D_OK;
DWORD stateSampler = RemapSamplerState(Stage);
return SetStateTexture(stateSampler, pTexture);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetTextureStageState(
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
DWORD* pValue) {
if (unlikely(pValue == nullptr))
return D3DERR_INVALIDCALL;
*pValue = 0;
if (unlikely(Stage >= caps::TextureStageCount))
return D3DERR_INVALIDCALL;
if (unlikely(Type >= TextureStageStateCount))
return D3DERR_INVALIDCALL;
*pValue = m_state.textureStages[Stage][Type];
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetTextureStageState(
DWORD Stage,
D3DTEXTURESTAGESTATETYPE Type,
DWORD Value) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(Stage >= caps::TextureStageCount))
return D3DERR_INVALIDCALL;
if (unlikely(Type >= TextureStageStateCount))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord()))
return m_recorder->SetTextureStageState(Stage, Type, Value);
if (likely(m_state.textureStages[Stage][Type] != Value)) {
if (Type == D3DTSS_TEXTURETRANSFORMFLAGS) {
m_projectionBitfield &= ~(1 << Stage);
if (Value & D3DTTFF_PROJECTED)
m_projectionBitfield |= 1 << Stage;
}
if ((Type >= D3DTSS_BUMPENVMAT00 && Type <= D3DTSS_BUMPENVMAT11)
|| (Type == D3DTSS_BUMPENVLSCALE || Type == D3DTSS_BUMPENVLOFFSET))
m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData);
else if (Type == D3DTSS_TEXTURETRANSFORMFLAGS) {
// This state affects both!
m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
}
else if (Type != D3DTSS_TEXCOORDINDEX)
m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
else
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
m_state.textureStages[Stage][Type] = Value;
}
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetSamplerState(
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
DWORD* pValue) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pValue == nullptr))
return D3DERR_INVALIDCALL;
*pValue = 0;
if (unlikely(InvalidSampler(Sampler)))
return D3D_OK;
Sampler = RemapSamplerState(Sampler);
*pValue = m_state.samplerStates[Sampler][Type];
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSamplerState(
DWORD Sampler,
D3DSAMPLERSTATETYPE Type,
DWORD Value) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(InvalidSampler(Sampler)))
return D3D_OK;
uint32_t stateSampler = RemapSamplerState(Sampler);
return SetStateSamplerState(stateSampler, Type, Value);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ValidateDevice(DWORD* pNumPasses) {
if (pNumPasses != nullptr)
*pNumPasses = 1;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY* pEntries) {
// This succeeds even though we don't advertise support.
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY* pEntries) {
// Don't advertise support for this...
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetCurrentTexturePalette(UINT PaletteNumber) {
// This succeeds even though we don't advertise support.
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetCurrentTexturePalette(UINT *PaletteNumber) {
// Don't advertise support for this...
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetScissorRect(const RECT* pRect) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pRect == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord()))
return m_recorder->SetScissorRect(pRect);
if (m_state.scissorRect == *pRect)
return D3D_OK;
m_state.scissorRect = *pRect;
m_flags.set(D3D9DeviceFlag::DirtyViewportScissor);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetScissorRect(RECT* pRect) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pRect == nullptr))
return D3DERR_INVALIDCALL;
*pRect = m_state.scissorRect;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetSoftwareVertexProcessing(BOOL bSoftware) {
auto lock = LockDevice();
if (bSoftware && !CanSWVP())
return D3DERR_INVALIDCALL;
m_isSWVP = bSoftware;
return D3D_OK;
}
BOOL STDMETHODCALLTYPE D3D9DeviceEx::GetSoftwareVertexProcessing() {
auto lock = LockDevice();
return m_isSWVP;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetNPatchMode(float nSegments) {
return D3D_OK;
}
float STDMETHODCALLTYPE D3D9DeviceEx::GetNPatchMode() {
return 0.0f;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitive(
D3DPRIMITIVETYPE PrimitiveType,
UINT StartVertex,
UINT PrimitiveCount) {
D3D9DeviceLock lock = LockDevice();
PrepareDraw(PrimitiveType);
EmitCs([this,
cPrimType = PrimitiveType,
cPrimCount = PrimitiveCount,
cStartVertex = StartVertex,
cInstanceCount = GetInstanceCount()
](DxvkContext* ctx) {
auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
ApplyPrimitiveType(ctx, cPrimType);
ctx->draw(
drawInfo.vertexCount, drawInfo.instanceCount,
cStartVertex, 0);
});
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitive(
D3DPRIMITIVETYPE PrimitiveType,
INT BaseVertexIndex,
UINT MinVertexIndex,
UINT NumVertices,
UINT StartIndex,
UINT PrimitiveCount) {
D3D9DeviceLock lock = LockDevice();
PrepareDraw(PrimitiveType);
EmitCs([this,
cPrimType = PrimitiveType,
cPrimCount = PrimitiveCount,
cStartIndex = StartIndex,
cBaseVertexIndex = BaseVertexIndex,
cInstanceCount = GetInstanceCount()
](DxvkContext* ctx) {
auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
ApplyPrimitiveType(ctx, cPrimType);
ctx->drawIndexed(
drawInfo.vertexCount, drawInfo.instanceCount,
cStartIndex,
cBaseVertexIndex, 0);
});
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawPrimitiveUP(
D3DPRIMITIVETYPE PrimitiveType,
UINT PrimitiveCount,
const void* pVertexStreamZeroData,
UINT VertexStreamZeroStride) {
D3D9DeviceLock lock = LockDevice();
PrepareDraw(PrimitiveType, true);
auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0);
const uint32_t upSize = drawInfo.vertexCount * VertexStreamZeroStride;
auto upSlice = AllocUpBuffer(upSize);
std::memcpy(upSlice.mapPtr, pVertexStreamZeroData, upSize);
EmitCs([this,
cBufferSlice = std::move(upSlice.slice),
cPrimType = PrimitiveType,
cPrimCount = PrimitiveCount,
cInstanceCount = GetInstanceCount(),
cStride = VertexStreamZeroStride
](DxvkContext* ctx) {
auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
ApplyPrimitiveType(ctx, cPrimType);
ctx->bindVertexBuffer(0, cBufferSlice, cStride);
ctx->draw(
drawInfo.vertexCount, drawInfo.instanceCount,
0, 0);
});
m_flags.set(D3D9DeviceFlag::UpDirtiedVertices);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitiveUP(
D3DPRIMITIVETYPE PrimitiveType,
UINT MinVertexIndex,
UINT NumVertices,
UINT PrimitiveCount,
const void* pIndexData,
D3DFORMAT IndexDataFormat,
const void* pVertexStreamZeroData,
UINT VertexStreamZeroStride) {
D3D9DeviceLock lock = LockDevice();
PrepareDraw(PrimitiveType, true);
auto drawInfo = GenerateDrawInfo(PrimitiveType, PrimitiveCount, 0);
const uint32_t vertexSize = (MinVertexIndex + NumVertices) * VertexStreamZeroStride;
const uint32_t indexSize = IndexDataFormat == D3DFMT_INDEX16 ? 2 : 4;
const uint32_t indicesSize = drawInfo.vertexCount * indexSize;
const uint32_t upSize = vertexSize + indicesSize;
auto upSlice = AllocUpBuffer(upSize);
uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr);
std::memcpy(data, pVertexStreamZeroData, vertexSize);
std::memcpy(data + vertexSize, pIndexData, indicesSize);
EmitCs([this,
cVertexSize = vertexSize,
cBufferSlice = std::move(upSlice.slice),
cPrimType = PrimitiveType,
cPrimCount = PrimitiveCount,
cStride = VertexStreamZeroStride,
cInstanceCount = GetInstanceCount(),
cIndexType = DecodeIndexType(
static_cast<D3D9Format>(IndexDataFormat))
](DxvkContext* ctx) {
auto drawInfo = GenerateDrawInfo(cPrimType, cPrimCount, cInstanceCount);
ApplyPrimitiveType(ctx, cPrimType);
ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride);
ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType);
ctx->drawIndexed(
drawInfo.vertexCount, drawInfo.instanceCount,
0,
0, 0);
});
m_flags.set(D3D9DeviceFlag::UpDirtiedVertices);
m_flags.set(D3D9DeviceFlag::UpDirtiedIndices);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ProcessVertices(
UINT SrcStartIndex,
UINT DestIndex,
UINT VertexCount,
IDirect3DVertexBuffer9* pDestBuffer,
IDirect3DVertexDeclaration9* pVertexDecl,
DWORD Flags) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pDestBuffer == nullptr || pVertexDecl == nullptr))
return D3DERR_INVALIDCALL;
if (!SupportsSWVP()) {
static bool s_errorShown = false;
if (!std::exchange(s_errorShown, true))
Logger::err("D3D9DeviceEx::ProcessVertices: SWVP emu unsupported (vertexPipelineStoresAndAtomics)");
return D3D_OK;
}
D3D9CommonBuffer* dst = static_cast<D3D9VertexBuffer*>(pDestBuffer)->GetCommonBuffer();
D3D9VertexDecl* decl = static_cast<D3D9VertexDecl*> (pVertexDecl);
PrepareDraw(D3DPT_FORCE_DWORD, false);
if (decl == nullptr) {
DWORD FVF = dst->Desc()->FVF;
auto iter = m_fvfTable.find(FVF);
if (iter == m_fvfTable.end()) {
decl = new D3D9VertexDecl(this, FVF);
m_fvfTable.insert(std::make_pair(FVF, decl));
}
else
decl = iter->second.ptr();
}
uint32_t offset = DestIndex * decl->GetSize();
auto slice = dst->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>();
slice = slice.subSlice(offset, slice.length() - offset);
EmitCs([this,
cDecl = ref(decl),
cVertexCount = VertexCount,
cStartIndex = SrcStartIndex,
cInstanceCount = GetInstanceCount(),
cBufferSlice = slice,
cIndexed = m_state.indices != nullptr
](DxvkContext* ctx) {
Rc<DxvkShader> shader = m_swvpEmulator.GetShaderModule(this, cDecl);
auto drawInfo = GenerateDrawInfo(D3DPT_POINTLIST, cVertexCount, cInstanceCount);
if (drawInfo.instanceCount != 1) {
drawInfo.instanceCount = 1;
Logger::warn("D3D9DeviceEx::ProcessVertices: instancing unsupported");
}
ApplyPrimitiveType(ctx, D3DPT_POINTLIST);
ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, shader);
ctx->bindResourceBuffer(getSWVPBufferSlot(), cBufferSlice);
ctx->draw(
drawInfo.vertexCount, drawInfo.instanceCount,
cStartIndex, 0);
ctx->bindResourceBuffer(getSWVPBufferSlot(), DxvkBufferSlice());
ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, nullptr);
});
if (dst->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) {
uint32_t copySize = VertexCount * decl->GetSize();
EmitCs([
cSrcBuffer = dst->GetBuffer<D3D9_COMMON_BUFFER_TYPE_REAL>(),
cDstBuffer = dst->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>(),
cOffset = offset,
cCopySize = copySize
](DxvkContext* ctx) {
ctx->copyBuffer(cDstBuffer, cOffset, cSrcBuffer, cOffset, cCopySize);
});
}
dst->SetReadLocked(true);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexDeclaration(
const D3DVERTEXELEMENT9* pVertexElements,
IDirect3DVertexDeclaration9** ppDecl) {
InitReturnPtr(ppDecl);
if (unlikely(ppDecl == nullptr || pVertexElements == nullptr))
return D3DERR_INVALIDCALL;
const D3DVERTEXELEMENT9* counter = pVertexElements;
while (counter->Stream != 0xFF)
counter++;
const uint32_t declCount = uint32_t(counter - pVertexElements);
try {
const Com<D3D9VertexDecl> decl = new D3D9VertexDecl(this, pVertexElements, declCount);
*ppDecl = decl.ref();
return D3D_OK;
}
catch (const DxvkError & e) {
Logger::err(e.message());
return D3DERR_INVALIDCALL;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexDeclaration(IDirect3DVertexDeclaration9* pDecl) {
D3D9DeviceLock lock = LockDevice();
D3D9VertexDecl* decl = static_cast<D3D9VertexDecl*>(pDecl);
if (unlikely(ShouldRecord()))
return m_recorder->SetVertexDeclaration(decl);
if (decl == m_state.vertexDecl.ptr())
return D3D_OK;
bool dirtyFFShader = decl == nullptr || m_state.vertexDecl == nullptr;
if (!dirtyFFShader)
dirtyFFShader |= decl->TestFlag(D3D9VertexDeclFlag::HasPositionT) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT)
|| decl->TestFlag(D3D9VertexDeclFlag::HasColor0) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0)
|| decl->TestFlag(D3D9VertexDeclFlag::HasColor1) != m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1)
|| decl->GetTexcoordMask() != m_state.vertexDecl->GetTexcoordMask();
if (dirtyFFShader)
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
m_state.vertexDecl = decl;
m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexDeclaration(IDirect3DVertexDeclaration9** ppDecl) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppDecl);
if (ppDecl == nullptr)
return D3D_OK;
if (m_state.vertexDecl == nullptr)
return D3D_OK;
*ppDecl = m_state.vertexDecl.ref();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetFVF(DWORD FVF) {
D3D9DeviceLock lock = LockDevice();
if (FVF == 0)
return D3D_OK;
D3D9VertexDecl* decl = nullptr;
auto iter = m_fvfTable.find(FVF);
if (iter == m_fvfTable.end()) {
decl = new D3D9VertexDecl(this, FVF);
m_fvfTable.insert(std::make_pair(FVF, decl));
}
else
decl = iter->second.ptr();
return this->SetVertexDeclaration(decl);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetFVF(DWORD* pFVF) {
D3D9DeviceLock lock = LockDevice();
if (pFVF == nullptr)
return D3DERR_INVALIDCALL;
*pFVF = m_state.vertexDecl != nullptr
? m_state.vertexDecl->GetFVF()
: 0;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateVertexShader(
const DWORD* pFunction,
IDirect3DVertexShader9** ppShader) {
// CreateVertexShader does not init the
// return ptr unlike CreatePixelShader
if (unlikely(ppShader == nullptr))
return D3DERR_INVALIDCALL;
DxsoModuleInfo moduleInfo;
moduleInfo.options = m_dxsoOptions;
D3D9CommonShader module;
if (FAILED(this->CreateShaderModule(&module,
VK_SHADER_STAGE_VERTEX_BIT,
pFunction,
&moduleInfo)))
return D3DERR_INVALIDCALL;
*ppShader = ref(new D3D9VertexShader(this, module));
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShader(IDirect3DVertexShader9* pShader) {
D3D9DeviceLock lock = LockDevice();
D3D9VertexShader* shader = static_cast<D3D9VertexShader*>(pShader);
if (unlikely(ShouldRecord()))
return m_recorder->SetVertexShader(shader);
if (shader == m_state.vertexShader.ptr())
return D3D_OK;
auto* oldShader = GetCommonShader(m_state.vertexShader);
auto* newShader = GetCommonShader(shader);
bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader;
m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? &newShader->GetMeta() : nullptr;
if (newShader && oldShader) {
m_consts[DxsoProgramTypes::VertexShader].dirty
|= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|| newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
}
m_state.vertexShader = shader;
if (shader != nullptr) {
m_flags.clr(D3D9DeviceFlag::DirtyProgVertexShader);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
BindShader<DxsoProgramTypes::VertexShader>(
GetCommonShader(shader),
GetVertexShaderPermutation());
}
m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShader(IDirect3DVertexShader9** ppShader) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppShader);
if (unlikely(ppShader == nullptr))
return D3DERR_INVALIDCALL;
*ppShader = m_state.vertexShader.ref();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantF(
UINT StartRegister,
const float* pConstantData,
UINT Vector4fCount) {
D3D9DeviceLock lock = LockDevice();
return SetShaderConstants<
DxsoProgramTypes::VertexShader,
D3D9ConstantType::Float>(
StartRegister,
pConstantData,
Vector4fCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantF(
UINT StartRegister,
float* pConstantData,
UINT Vector4fCount) {
D3D9DeviceLock lock = LockDevice();
return GetShaderConstants<
DxsoProgramTypes::VertexShader,
D3D9ConstantType::Float>(
StartRegister,
pConstantData,
Vector4fCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantI(
UINT StartRegister,
const int* pConstantData,
UINT Vector4iCount) {
D3D9DeviceLock lock = LockDevice();
return SetShaderConstants<
DxsoProgramTypes::VertexShader,
D3D9ConstantType::Int>(
StartRegister,
pConstantData,
Vector4iCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantI(
UINT StartRegister,
int* pConstantData,
UINT Vector4iCount) {
D3D9DeviceLock lock = LockDevice();
return GetShaderConstants<
DxsoProgramTypes::VertexShader,
D3D9ConstantType::Int>(
StartRegister,
pConstantData,
Vector4iCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetVertexShaderConstantB(
UINT StartRegister,
const BOOL* pConstantData,
UINT BoolCount) {
D3D9DeviceLock lock = LockDevice();
return SetShaderConstants<
DxsoProgramTypes::VertexShader,
D3D9ConstantType::Bool>(
StartRegister,
pConstantData,
BoolCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetVertexShaderConstantB(
UINT StartRegister,
BOOL* pConstantData,
UINT BoolCount) {
D3D9DeviceLock lock = LockDevice();
return GetShaderConstants<
DxsoProgramTypes::VertexShader,
D3D9ConstantType::Bool>(
StartRegister,
pConstantData,
BoolCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSource(
UINT StreamNumber,
IDirect3DVertexBuffer9* pStreamData,
UINT OffsetInBytes,
UINT Stride) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(StreamNumber >= caps::MaxStreams))
return D3DERR_INVALIDCALL;
D3D9VertexBuffer* buffer = static_cast<D3D9VertexBuffer*>(pStreamData);
if (unlikely(ShouldRecord()))
return m_recorder->SetStreamSource(
StreamNumber,
buffer,
OffsetInBytes,
Stride);
auto& vbo = m_state.vertexBuffers[StreamNumber];
bool needsUpdate = vbo.vertexBuffer != buffer;
if (needsUpdate)
vbo.vertexBuffer = buffer;
needsUpdate |= vbo.offset != OffsetInBytes
|| vbo.stride != Stride;
vbo.offset = OffsetInBytes;
vbo.stride = Stride;
if (needsUpdate)
BindVertexBuffer(StreamNumber, buffer, OffsetInBytes, Stride);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSource(
UINT StreamNumber,
IDirect3DVertexBuffer9** ppStreamData,
UINT* pOffsetInBytes,
UINT* pStride) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppStreamData);
if (unlikely(pOffsetInBytes != nullptr))
*pOffsetInBytes = 0;
if (unlikely(pStride != nullptr))
*pStride = 0;
if (unlikely(ppStreamData == nullptr || pOffsetInBytes == nullptr || pStride == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(StreamNumber >= caps::MaxStreams))
return D3DERR_INVALIDCALL;
const auto& vbo = m_state.vertexBuffers[StreamNumber];
*ppStreamData = vbo.vertexBuffer.ref();
*pOffsetInBytes = vbo.offset;
*pStride = vbo.stride;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetStreamSourceFreq(UINT StreamNumber, UINT Setting) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(StreamNumber >= caps::MaxStreams))
return D3DERR_INVALIDCALL;
const bool indexed = Setting & D3DSTREAMSOURCE_INDEXEDDATA;
const bool instanced = Setting & D3DSTREAMSOURCE_INSTANCEDATA;
if (unlikely(StreamNumber == 0 && instanced))
return D3DERR_INVALIDCALL;
if (unlikely(instanced && indexed))
return D3DERR_INVALIDCALL;
if (unlikely(Setting == 0))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord()))
return m_recorder->SetStreamSourceFreq(StreamNumber, Setting);
if (m_state.streamFreq[StreamNumber] == Setting)
return D3D_OK;
m_state.streamFreq[StreamNumber] = Setting;
if (instanced)
m_instancedData |= 1u << StreamNumber;
else
m_instancedData &= ~(1u << StreamNumber);
m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetStreamSourceFreq(UINT StreamNumber, UINT* pSetting) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(StreamNumber >= caps::MaxStreams))
return D3DERR_INVALIDCALL;
if (unlikely(pSetting == nullptr))
return D3DERR_INVALIDCALL;
*pSetting = m_state.streamFreq[StreamNumber];
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetIndices(IDirect3DIndexBuffer9* pIndexData) {
D3D9DeviceLock lock = LockDevice();
D3D9IndexBuffer* buffer = static_cast<D3D9IndexBuffer*>(pIndexData);
if (unlikely(ShouldRecord()))
return m_recorder->SetIndices(buffer);
if (buffer == m_state.indices.ptr())
return D3D_OK;
m_state.indices = buffer;
BindIndices();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetIndices(IDirect3DIndexBuffer9** ppIndexData) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppIndexData);
if (unlikely(ppIndexData == nullptr))
return D3DERR_INVALIDCALL;
*ppIndexData = m_state.indices.ref();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreatePixelShader(
const DWORD* pFunction,
IDirect3DPixelShader9** ppShader) {
InitReturnPtr(ppShader);
if (unlikely(ppShader == nullptr))
return D3DERR_INVALIDCALL;
DxsoModuleInfo moduleInfo;
moduleInfo.options = m_dxsoOptions;
D3D9CommonShader module;
if (FAILED(this->CreateShaderModule(&module,
VK_SHADER_STAGE_FRAGMENT_BIT,
pFunction,
&moduleInfo)))
return D3DERR_INVALIDCALL;
*ppShader = ref(new D3D9PixelShader(this, module));
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShader(IDirect3DPixelShader9* pShader) {
D3D9DeviceLock lock = LockDevice();
D3D9PixelShader* shader = static_cast<D3D9PixelShader*>(pShader);
if (unlikely(ShouldRecord()))
return m_recorder->SetPixelShader(shader);
if (shader == m_state.pixelShader.ptr())
return D3D_OK;
auto* oldShader = GetCommonShader(m_state.pixelShader);
auto* newShader = GetCommonShader(shader);
bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader;
m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? &newShader->GetMeta() : nullptr;
if (newShader && oldShader) {
m_consts[DxsoProgramTypes::PixelShader].dirty
|= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|| newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
}
m_state.pixelShader = shader;
if (shader != nullptr) {
m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
BindShader<DxsoProgramTypes::PixelShader>(
GetCommonShader(shader),
GetPixelShaderPermutation());
}
UpdateActiveHazards();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShader(IDirect3DPixelShader9** ppShader) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppShader);
if (unlikely(ppShader == nullptr))
return D3DERR_INVALIDCALL;
*ppShader = m_state.pixelShader.ref();
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantF(
UINT StartRegister,
const float* pConstantData,
UINT Vector4fCount) {
D3D9DeviceLock lock = LockDevice();
return SetShaderConstants <
DxsoProgramTypes::PixelShader,
D3D9ConstantType::Float>(
StartRegister,
pConstantData,
Vector4fCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantF(
UINT StartRegister,
float* pConstantData,
UINT Vector4fCount) {
D3D9DeviceLock lock = LockDevice();
return GetShaderConstants<
DxsoProgramTypes::PixelShader,
D3D9ConstantType::Float>(
StartRegister,
pConstantData,
Vector4fCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantI(
UINT StartRegister,
const int* pConstantData,
UINT Vector4iCount) {
D3D9DeviceLock lock = LockDevice();
return SetShaderConstants<
DxsoProgramTypes::PixelShader,
D3D9ConstantType::Int>(
StartRegister,
pConstantData,
Vector4iCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantI(
UINT StartRegister,
int* pConstantData,
UINT Vector4iCount) {
D3D9DeviceLock lock = LockDevice();
return GetShaderConstants<
DxsoProgramTypes::PixelShader,
D3D9ConstantType::Int>(
StartRegister,
pConstantData,
Vector4iCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetPixelShaderConstantB(
UINT StartRegister,
const BOOL* pConstantData,
UINT BoolCount) {
D3D9DeviceLock lock = LockDevice();
return SetShaderConstants<
DxsoProgramTypes::PixelShader,
D3D9ConstantType::Bool>(
StartRegister,
pConstantData,
BoolCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetPixelShaderConstantB(
UINT StartRegister,
BOOL* pConstantData,
UINT BoolCount) {
D3D9DeviceLock lock = LockDevice();
return GetShaderConstants<
DxsoProgramTypes::PixelShader,
D3D9ConstantType::Bool>(
StartRegister,
pConstantData,
BoolCount);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawRectPatch(
UINT Handle,
const float* pNumSegs,
const D3DRECTPATCH_INFO* pRectPatchInfo) {
static bool s_errorShown = false;
if (!std::exchange(s_errorShown, true))
Logger::warn("D3D9DeviceEx::DrawRectPatch: Stub");
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawTriPatch(
UINT Handle,
const float* pNumSegs,
const D3DTRIPATCH_INFO* pTriPatchInfo) {
static bool s_errorShown = false;
if (!std::exchange(s_errorShown, true))
Logger::warn("D3D9DeviceEx::DrawTriPatch: Stub");
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DeletePatch(UINT Handle) {
static bool s_errorShown = false;
if (!std::exchange(s_errorShown, true))
Logger::warn("D3D9DeviceEx::DeletePatch: Stub");
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9** ppQuery) {
InitReturnPtr(ppQuery);
HRESULT hr = D3D9Query::QuerySupported(Type);
if (ppQuery == nullptr || hr != D3D_OK)
return hr;
try {
*ppQuery = ref(new D3D9Query(this, Type));
return D3D_OK;
}
catch (const DxvkError & e) {
Logger::err(e.message());
return D3DERR_INVALIDCALL;
}
}
// Ex Methods
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetConvolutionMonoKernel(
UINT width,
UINT height,
float* rows,
float* columns) {
// We don't advertise support for this.
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ComposeRects(
IDirect3DSurface9* pSrc,
IDirect3DSurface9* pDst,
IDirect3DVertexBuffer9* pSrcRectDescs,
UINT NumRects,
IDirect3DVertexBuffer9* pDstRectDescs,
D3DCOMPOSERECTSOP Operation,
int Xoffset,
int Yoffset) {
Logger::warn("D3D9DeviceEx::ComposeRects: Stub");
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetGPUThreadPriority(INT* pPriority) {
Logger::warn("D3D9DeviceEx::GetGPUThreadPriority: Stub");
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetGPUThreadPriority(INT Priority) {
Logger::warn("D3D9DeviceEx::SetGPUThreadPriority: Stub");
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::WaitForVBlank(UINT iSwapChain) {
D3D9DeviceLock lock = LockDevice();
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
return swapchain->WaitForVBlank();
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckResourceResidency(IDirect3DResource9** pResourceArray, UINT32 NumResources) {
Logger::warn("D3D9DeviceEx::CheckResourceResidency: Stub");
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::SetMaximumFrameLatency(UINT MaxLatency) {
D3D9DeviceLock lock = LockDevice();
if (MaxLatency == 0)
MaxLatency = DefaultFrameLatency;
if (MaxLatency > MaxFrameLatency)
MaxLatency = MaxFrameLatency;
m_frameLatency = MaxLatency;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetMaximumFrameLatency(UINT* pMaxLatency) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pMaxLatency == nullptr))
return D3DERR_INVALIDCALL;
*pMaxLatency = m_frameLatency;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CheckDeviceState(HWND hDestinationWindow) {
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::PresentEx(
const RECT* pSourceRect,
const RECT* pDestRect,
HWND hDestWindowOverride,
const RGNDATA* pDirtyRegion,
DWORD dwFlags) {
D3D9DeviceLock lock = LockDevice();
return GetInternalSwapchain(0)->Present(
pSourceRect,
pDestRect,
hDestWindowOverride,
pDirtyRegion,
dwFlags);
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateRenderTargetEx(
UINT Width,
UINT Height,
D3DFORMAT Format,
D3DMULTISAMPLE_TYPE MultiSample,
DWORD MultisampleQuality,
BOOL Lockable,
IDirect3DSurface9** ppSurface,
HANDLE* pSharedHandle,
DWORD Usage) {
InitReturnPtr(ppSurface);
InitReturnPtr(pSharedHandle);
if (unlikely(ppSurface == nullptr))
return D3DERR_INVALIDCALL;
D3D9_COMMON_TEXTURE_DESC desc;
desc.Width = Width;
desc.Height = Height;
desc.Depth = 1;
desc.ArraySize = 1;
desc.MipLevels = 1;
desc.Usage = Usage | D3DUSAGE_RENDERTARGET;
desc.Format = EnumerateFormat(Format);
desc.Pool = D3DPOOL_DEFAULT;
desc.Discard = FALSE;
desc.MultiSample = MultiSample;
desc.MultisampleQuality = MultisampleQuality;
if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9Surface> surface = new D3D9Surface(this, &desc);
m_initializer->InitTexture(surface->GetCommonTexture());
*ppSurface = surface.ref();
return D3D_OK;
}
catch (const DxvkError& e) {
Logger::err(e.message());
return D3DERR_OUTOFVIDEOMEMORY;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateOffscreenPlainSurfaceEx(
UINT Width,
UINT Height,
D3DFORMAT Format,
D3DPOOL Pool,
IDirect3DSurface9** ppSurface,
HANDLE* pSharedHandle,
DWORD Usage) {
InitReturnPtr(ppSurface);
InitReturnPtr(pSharedHandle);
if (unlikely(ppSurface == nullptr))
return D3DERR_INVALIDCALL;
D3D9_COMMON_TEXTURE_DESC desc;
desc.Width = Width;
desc.Height = Height;
desc.Depth = 1;
desc.ArraySize = 1;
desc.MipLevels = 1;
desc.Usage = Usage;
desc.Format = EnumerateFormat(Format);
desc.Pool = Pool;
desc.Discard = FALSE;
desc.MultiSample = D3DMULTISAMPLE_NONE;
desc.MultisampleQuality = 0;
if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9Surface> surface = new D3D9Surface(this, &desc);
m_initializer->InitTexture(surface->GetCommonTexture());
*ppSurface = surface.ref();
return D3D_OK;
}
catch (const DxvkError& e) {
Logger::err(e.message());
return D3DERR_OUTOFVIDEOMEMORY;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateDepthStencilSurfaceEx(
UINT Width,
UINT Height,
D3DFORMAT Format,
D3DMULTISAMPLE_TYPE MultiSample,
DWORD MultisampleQuality,
BOOL Discard,
IDirect3DSurface9** ppSurface,
HANDLE* pSharedHandle,
DWORD Usage) {
InitReturnPtr(ppSurface);
InitReturnPtr(pSharedHandle);
if (unlikely(ppSurface == nullptr))
return D3DERR_INVALIDCALL;
D3D9_COMMON_TEXTURE_DESC desc;
desc.Width = Width;
desc.Height = Height;
desc.Depth = 1;
desc.ArraySize = 1;
desc.MipLevels = 1;
desc.Usage = Usage | D3DUSAGE_DEPTHSTENCIL;
desc.Format = EnumerateFormat(Format);
desc.Pool = D3DPOOL_DEFAULT;
desc.Discard = Discard;
desc.MultiSample = MultiSample;
desc.MultisampleQuality = MultisampleQuality;
if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
return D3DERR_INVALIDCALL;
try {
const Com<D3D9Surface> surface = new D3D9Surface(this, &desc);
m_initializer->InitTexture(surface->GetCommonTexture());
*ppSurface = surface.ref();
return D3D_OK;
}
catch (const DxvkError& e) {
Logger::err(e.message());
return D3DERR_OUTOFVIDEOMEMORY;
}
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::ResetEx(
D3DPRESENT_PARAMETERS* pPresentationParameters,
D3DDISPLAYMODEEX* pFullscreenDisplayMode) {
D3D9DeviceLock lock = LockDevice();
HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode);
if (FAILED(hr))
return hr;
return D3D_OK;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::GetDisplayModeEx(
UINT iSwapChain,
D3DDISPLAYMODEEX* pMode,
D3DDISPLAYROTATION* pRotation) {
D3D9DeviceLock lock = LockDevice();
if (auto* swapchain = GetInternalSwapchain(iSwapChain))
return swapchain->GetDisplayModeEx(pMode, pRotation);
return D3DERR_INVALIDCALL;
}
HRESULT STDMETHODCALLTYPE D3D9DeviceEx::CreateAdditionalSwapChainEx(
D3DPRESENT_PARAMETERS* pPresentationParameters,
const D3DDISPLAYMODEEX* pFullscreenDisplayMode,
IDirect3DSwapChain9** ppSwapChain) {
D3D9DeviceLock lock = LockDevice();
InitReturnPtr(ppSwapChain);
if (ppSwapChain == nullptr || pPresentationParameters == nullptr)
return D3DERR_INVALIDCALL;
for (uint32_t i = 0; i < m_swapchains.size(); i++)
GetInternalSwapchain(i)->Invalidate(pPresentationParameters->hDeviceWindow);
try {
auto* swapchain = new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode);
*ppSwapChain = ref(swapchain);
m_swapchains.push_back(swapchain);
swapchain->AddRefPrivate();
}
catch (const DxvkError & e) {
Logger::err(e.message());
return D3DERR_NOTAVAILABLE;
}
return D3D_OK;
}
HRESULT D3D9DeviceEx::SetStateSamplerState(
DWORD StateSampler,
D3DSAMPLERSTATETYPE Type,
DWORD Value) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(ShouldRecord()))
return m_recorder->SetStateSamplerState(StateSampler, Type, Value);
auto& state = m_state.samplerStates;
bool changed = state[StateSampler][Type] != Value;
if (likely(changed)) {
state[StateSampler][Type] = Value;
if (Type == D3DSAMP_ADDRESSU
|| Type == D3DSAMP_ADDRESSV
|| Type == D3DSAMP_ADDRESSW
|| Type == D3DSAMP_MAGFILTER
|| Type == D3DSAMP_MINFILTER
|| Type == D3DSAMP_MIPFILTER
|| Type == D3DSAMP_MAXANISOTROPY
|| Type == D3DSAMP_MIPMAPLODBIAS
|| Type == D3DSAMP_MAXMIPLEVEL
|| Type == D3DSAMP_BORDERCOLOR)
m_dirtySamplerStates |= 1u << StateSampler;
else if (Type == D3DSAMP_SRGBTEXTURE)
BindTexture(StateSampler);
}
return D3D_OK;
}
HRESULT D3D9DeviceEx::SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(ShouldRecord()))
return m_recorder->SetStateTexture(StateSampler, pTexture);
if (m_state.textures[StateSampler] == pTexture)
return D3D_OK;
// We need to check our ops and disable respective stages.
// Given we have transition from a null resource to
// a valid resource or vice versa.
if (pTexture == nullptr || m_state.textures[StateSampler] == nullptr)
m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
TextureChangePrivate(m_state.textures[StateSampler], pTexture);
BindTexture(StateSampler);
// We only care about PS samplers
if (likely(StateSampler <= caps::MaxSamplers))
UpdateActiveRTTextures(StateSampler);
return D3D_OK;
}
HRESULT D3D9DeviceEx::SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(ShouldRecord()))
return m_recorder->SetStateTransform(idx, pMatrix);
m_state.transforms[idx] = ConvertMatrix(pMatrix);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
if (idx == GetTransformIndex(D3DTS_VIEW) || idx >= GetTransformIndex(D3DTS_WORLD))
m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend);
return D3D_OK;
}
bool D3D9DeviceEx::IsExtended() {
return m_parent->IsExtended();
}
bool D3D9DeviceEx::SupportsSWVP() {
return m_dxvkDevice->features().core.features.vertexPipelineStoresAndAtomics;
}
HWND D3D9DeviceEx::GetWindow() {
return m_window;
}
DxvkDeviceFeatures D3D9DeviceEx::GetDeviceFeatures(const Rc<DxvkAdapter>& adapter) {
DxvkDeviceFeatures supported = adapter->features();
DxvkDeviceFeatures enabled = {};
// Geometry shaders are used for some meta ops
enabled.core.features.geometryShader = VK_TRUE;
enabled.core.features.robustBufferAccess = VK_TRUE;
enabled.extMemoryPriority.memoryPriority = supported.extMemoryPriority.memoryPriority;
enabled.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation = supported.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation;
enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateDivisor;
enabled.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor = supported.extVertexAttributeDivisor.vertexAttributeInstanceRateZeroDivisor;
// ProcessVertices
enabled.core.features.vertexPipelineStoresAndAtomics = supported.core.features.vertexPipelineStoresAndAtomics;
// DXVK Meta
enabled.core.features.shaderStorageImageWriteWithoutFormat = VK_TRUE;
enabled.core.features.shaderStorageImageExtendedFormats = VK_TRUE;
enabled.core.features.imageCubeArray = VK_TRUE;
// SM1 level hardware
enabled.core.features.depthClamp = VK_TRUE;
enabled.core.features.depthBiasClamp = VK_TRUE;
enabled.core.features.fillModeNonSolid = VK_TRUE;
enabled.core.features.pipelineStatisticsQuery = supported.core.features.pipelineStatisticsQuery;
enabled.core.features.sampleRateShading = VK_TRUE;
enabled.core.features.samplerAnisotropy = VK_TRUE;
enabled.core.features.shaderClipDistance = VK_TRUE;
enabled.core.features.shaderCullDistance = VK_TRUE;
// Ensure we support real BC formats and unofficial vendor ones.
enabled.core.features.textureCompressionBC = VK_TRUE;
enabled.extDepthClipEnable.depthClipEnable = supported.extDepthClipEnable.depthClipEnable;
enabled.extHostQueryReset.hostQueryReset = supported.extHostQueryReset.hostQueryReset;
// SM2 level hardware
enabled.core.features.occlusionQueryPrecise = VK_TRUE;
// SM3 level hardware
enabled.core.features.multiViewport = VK_TRUE;
enabled.core.features.independentBlend = VK_TRUE;
// D3D10 level hardware supports this in D3D9 native.
enabled.core.features.fullDrawIndexUint32 = VK_TRUE;
return enabled;
}
void D3D9DeviceEx::DetermineConstantLayouts(bool canSWVP) {
m_vsLayout.floatCount = canSWVP ? uint32_t(m_d3d9Options.swvpFloatCount) : caps::MaxFloatConstantsVS;
m_vsLayout.intCount = canSWVP ? uint32_t(m_d3d9Options.swvpIntCount) : caps::MaxOtherConstants;
m_vsLayout.boolCount = canSWVP ? uint32_t(m_d3d9Options.swvpBoolCount) : caps::MaxOtherConstants;
m_vsLayout.bitmaskCount = align(m_vsLayout.boolCount, 32) / 32;
m_psLayout.floatCount = caps::MaxFloatConstantsPS;
m_psLayout.intCount = caps::MaxOtherConstants;
m_psLayout.boolCount = caps::MaxOtherConstants;
m_psLayout.bitmaskCount = align(m_psLayout.boolCount, 32) / 32;
}
D3D9UPBufferSlice D3D9DeviceEx::AllocUpBuffer(VkDeviceSize size) {
constexpr VkDeviceSize DefaultSize = 1 << 20;
constexpr VkMemoryPropertyFlags memoryFlags
= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
| VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
if (size <= DefaultSize) {
if (unlikely(!m_upBuffer.slice.defined())) {
DxvkBufferCreateInfo info;
info.size = DefaultSize;
info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT
| VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
| VK_ACCESS_INDEX_READ_BIT;
info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
m_upBuffer.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags));
m_upBuffer.mapPtr = m_upBuffer.slice.mapPtr(0);
} else if (unlikely(m_upBuffer.slice.length() < size)) {
auto physSlice = m_upBuffer.slice.buffer()->allocSlice();
m_upBuffer.slice = DxvkBufferSlice(m_upBuffer.slice.buffer());
m_upBuffer.mapPtr = physSlice.mapPtr;
EmitCs([
cBuffer = m_upBuffer.slice.buffer(),
cSlice = physSlice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
}
D3D9UPBufferSlice result;
result.slice = m_upBuffer.slice.subSlice(0, size);
result.mapPtr = reinterpret_cast<char*>(m_upBuffer.mapPtr) + m_upBuffer.slice.offset();
VkDeviceSize adjust = align(size, CACHE_LINE_SIZE);
m_upBuffer.slice = m_upBuffer.slice.subSlice(adjust, m_upBuffer.slice.length() - adjust);
return result;
} else {
// Create a temporary buffer for very large allocations
DxvkBufferCreateInfo info;
info.size = size;
info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT
| VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
info.access = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
| VK_ACCESS_INDEX_READ_BIT;
info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
D3D9UPBufferSlice result;
result.slice = DxvkBufferSlice(m_dxvkDevice->createBuffer(info, memoryFlags));
result.mapPtr = result.slice.mapPtr(0);
return result;
}
}
D3D9SwapChainEx* D3D9DeviceEx::GetInternalSwapchain(UINT index) {
if (unlikely(index >= m_swapchains.size()))
return nullptr;
return m_swapchains[index].ptr();
}
bool D3D9DeviceEx::ShouldRecord() {
return m_recorder != nullptr && !m_recorder->IsApplying();
}
D3D9_VK_FORMAT_MAPPING D3D9DeviceEx::LookupFormat(
D3D9Format Format) const {
return m_adapter->GetFormatMapping(Format);
}
DxvkFormatInfo D3D9DeviceEx::UnsupportedFormatInfo(
D3D9Format Format) const {
return m_adapter->GetUnsupportedFormatInfo(Format);
}
bool D3D9DeviceEx::WaitForResource(
const Rc<DxvkResource>& Resource,
DWORD MapFlags) {
// Wait for the any pending D3D9 command to be executed
// on the CS thread so that we can determine whether the
// resource is currently in use or not.
// Determine access type to wait for based on map mode
DxvkAccess access = (MapFlags & D3DLOCK_READONLY)
? DxvkAccess::Write
: DxvkAccess::Read;
if (!Resource->isInUse(access))
SynchronizeCsThread();
if (Resource->isInUse(access)) {
if (MapFlags & D3DLOCK_DONOTWAIT) {
// We don't have to wait, but misbehaving games may
// still try to spin on `Map` until the resource is
// idle, so we should flush pending commands
FlushImplicit(FALSE);
return false;
}
else {
// Make sure pending commands using the resource get
// executed on the the GPU if we have to wait for it
Flush();
SynchronizeCsThread();
while (Resource->isInUse(access))
dxvk::this_thread::yield();
}
}
return true;
}
uint32_t D3D9DeviceEx::CalcImageLockOffset(
uint32_t SlicePitch,
uint32_t RowPitch,
const DxvkFormatInfo* FormatInfo,
const D3DBOX* pBox) {
if (pBox == nullptr)
return 0;
std::array<uint32_t, 3> offsets = { pBox->Front, pBox->Top, pBox->Left };
uint32_t elementSize = 1;
if (FormatInfo != nullptr) {
elementSize = FormatInfo->elementSize;
offsets[0] = offsets[0] / FormatInfo->blockSize.depth;
offsets[1] = offsets[1] / FormatInfo->blockSize.height;
offsets[2] = offsets[2] / FormatInfo->blockSize.width;
}
return offsets[0] * SlicePitch +
offsets[1] * RowPitch +
offsets[2] * elementSize;
}
HRESULT D3D9DeviceEx::LockImage(
D3D9CommonTexture* pResource,
UINT Face,
UINT MipLevel,
D3DLOCKED_BOX* pLockedBox,
const D3DBOX* pBox,
DWORD Flags) {
D3D9DeviceLock lock = LockDevice();
UINT Subresource = pResource->CalcSubresource(Face, MipLevel);
// Don't allow multiple lockings.
if (unlikely(pResource->MarkLocked(Subresource, true)))
return D3DERR_INVALIDCALL;
if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_READONLY)) == (D3DLOCK_DISCARD | D3DLOCK_READONLY)))
return D3DERR_INVALIDCALL;
if (unlikely(!m_d3d9Options.allowLockFlagReadonly))
Flags &= ~D3DLOCK_READONLY;
if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)))
Flags &= ~D3DLOCK_DISCARD;
auto& desc = *(pResource->Desc());
bool alloced = pResource->CreateBufferSubresource(Subresource);
const Rc<DxvkBuffer> mappedBuffer = pResource->GetBuffer(Subresource);
auto formatInfo = imageFormatInfo(pResource->GetFormatMapping().FormatColor);
auto subresource = pResource->GetSubresourceFromIndex(
formatInfo->aspectMask, Subresource);
VkExtent3D levelExtent = pResource->GetExtentMip(MipLevel);
VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize);
const bool systemmem = desc.Pool == D3DPOOL_SYSTEMMEM;
const bool managed = IsPoolManaged(desc.Pool);
const bool scratch = desc.Pool == D3DPOOL_SCRATCH;
bool fullResource = pBox == nullptr;
if (unlikely(!fullResource)) {
VkOffset3D lockOffset;
VkExtent3D lockExtent;
ConvertBox(*pBox, lockOffset, lockExtent);
fullResource = lockOffset == VkOffset3D{ 0, 0, 0 }
&& lockExtent.width >= levelExtent.width
&& lockExtent.height >= levelExtent.height
&& lockExtent.depth >= levelExtent.depth;
}
// If we are not locking the entire image
// a partial discard is meant to occur.
// We can't really implement that, so just ignore discard
// if we are not locking the full resource
// DISCARD is also ignored for MANAGED and SYSTEMEM.
// DISCARD is not ignored for non-DYNAMIC unlike what the docs say.
if (!fullResource || desc.Pool != D3DPOOL_DEFAULT)
Flags &= ~D3DLOCK_DISCARD;
if (desc.Usage & D3DUSAGE_WRITEONLY)
Flags &= ~D3DLOCK_READONLY;
pResource->SetLockFlags(Subresource, Flags);
DxvkBufferSliceHandle physSlice;
if (Flags & D3DLOCK_DISCARD) {
// We do not have to preserve the contents of the
// buffer if the entire image gets discarded.
physSlice = pResource->DiscardMapSlice(Subresource);
EmitCs([
cImageBuffer = mappedBuffer,
cBufferSlice = physSlice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cImageBuffer, cBufferSlice);
});
}
else if (managed || scratch || systemmem) {
// Managed and scratch resources
// are meant to be able to provide readback without waiting.
// We always keep a copy of them in system memory for this reason.
// No need to wait as its not in use.
physSlice = pResource->GetMappedSlice(Subresource);
// We do not need to wait for the resource in the event the
// calling app promises not to overwrite data that is in use
// or is reading. Remember! This will only trigger for MANAGED resources
// that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting.
const bool readOnly = Flags & D3DLOCK_READONLY;
const bool skipWait = (readOnly && managed) || scratch || (readOnly && systemmem);
if (alloced)
std::memset(physSlice.mapPtr, 0, physSlice.length);
else if (!skipWait) {
if (!WaitForResource(mappedBuffer, Flags))
return D3DERR_WASSTILLDRAWING;
}
}
else {
bool renderable = desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL | D3DUSAGE_AUTOGENMIPMAP);
// If we are dirty, then we need to copy -> buffer
// We are also always dirty if we are a render target,
// a depth stencil, or auto generate mipmaps.
bool dirty = pResource->SetDirty(Subresource, false) || renderable;
if (unlikely(dirty)) {
Rc<DxvkImage> resourceImage = pResource->GetImage();
Rc<DxvkImage> mappedImage = resourceImage->info().sampleCount != 1
? pResource->GetResolveImage()
: std::move(resourceImage);
// When using any map mode which requires the image contents
// to be preserved, and if the GPU has write access to the
// image, copy the current image contents into the buffer.
auto subresourceLayers = vk::makeSubresourceLayers(subresource);
// We need to resolve this, some games
// lock MSAA render targets even though
// that's entirely illegal and they explicitly
// tell us that they do NOT want to lock them...
if (resourceImage != nullptr) {
EmitCs([
cMainImage = resourceImage,
cResolveImage = mappedImage,
cSubresource = subresourceLayers
] (DxvkContext* ctx) {
VkImageResolve region;
region.srcSubresource = cSubresource;
region.srcOffset = VkOffset3D { 0, 0, 0 };
region.dstSubresource = cSubresource;
region.dstOffset = VkOffset3D { 0, 0, 0 };
region.extent = cMainImage->mipLevelExtent(cSubresource.mipLevel);
if (cSubresource.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
ctx->resolveImage(
cResolveImage, cMainImage, region,
cMainImage->info().format);
}
else {
ctx->resolveDepthStencilImage(
cResolveImage, cMainImage, region,
VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
}
});
}
VkFormat packedFormat = GetPackedDepthStencilFormat(desc.Format);
EmitCs([
cImageBuffer = mappedBuffer,
cImage = std::move(mappedImage),
cSubresources = subresourceLayers,
cLevelExtent = levelExtent,
cPackedFormat = packedFormat
] (DxvkContext* ctx) {
if (cSubresources.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
ctx->copyImageToBuffer(
cImageBuffer, 0, VkExtent2D { 0u, 0u },
cImage, cSubresources, VkOffset3D { 0, 0, 0 },
cLevelExtent);
} else {
ctx->copyDepthStencilImageToPackedBuffer(
cImageBuffer, 0, cImage, cSubresources,
VkOffset2D { 0, 0 },
VkExtent2D { cLevelExtent.width, cLevelExtent.height },
cPackedFormat);
}
});
}
physSlice = mappedBuffer->getSliceHandle();
// If we are a new alloc, and we weren't dirty
// that means that we are a newly initialized
// texture, and hence can just memset -> 0 and
// avoid a wait here.
if (alloced && !dirty)
std::memset(physSlice.mapPtr, 0, physSlice.length);
else {
if (!WaitForResource(mappedBuffer, Flags))
return D3DERR_WASSTILLDRAWING;
}
}
const bool atiHack = desc.Format == D3D9Format::ATI1 || desc.Format == D3D9Format::ATI2;
// Set up map pointer.
if (atiHack) {
// We need to lie here. The game is expected to use this info and do a workaround.
// It's stupid. I know.
pLockedBox->RowPitch = std::max(desc.Width >> MipLevel, 1u);
pLockedBox->SlicePitch = pLockedBox->RowPitch * std::max(desc.Height >> MipLevel, 1u);
}
else {
// Data is tightly packed within the mapped buffer.
pLockedBox->RowPitch = formatInfo->elementSize * blockCount.width;
pLockedBox->SlicePitch = formatInfo->elementSize * blockCount.width * blockCount.height;
}
const uint32_t offset = CalcImageLockOffset(
pLockedBox->SlicePitch,
pLockedBox->RowPitch,
(!atiHack) ? formatInfo : nullptr,
pBox);
uint8_t* data = reinterpret_cast<uint8_t*>(physSlice.mapPtr);
data += offset;
pLockedBox->pBits = data;
return D3D_OK;
}
HRESULT D3D9DeviceEx::UnlockImage(
D3D9CommonTexture* pResource,
UINT Face,
UINT MipLevel) {
D3D9DeviceLock lock = LockDevice();
UINT Subresource = pResource->CalcSubresource(Face, MipLevel);
// We weren't locked anyway!
if (unlikely(!pResource->MarkLocked(Subresource, false)))
return D3DERR_INVALIDCALL;
// Do we have a pending copy?
if (!(pResource->GetLockFlags(Subresource) & D3DLOCK_READONLY)) {
// Only flush buffer -> image if we actually have an image
if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED)
this->FlushImage(pResource, Subresource);
}
if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED
&& (!pResource->IsDynamic())
&& (!pResource->IsManaged() || m_d3d9Options.evictManagedOnUnlock))
pResource->DestroyBufferSubresource(Subresource);
if (pResource->IsAutomaticMip())
GenerateMips(pResource);
return D3D_OK;
}
HRESULT D3D9DeviceEx::FlushImage(
D3D9CommonTexture* pResource,
UINT Subresource) {
const Rc<DxvkImage> image = pResource->GetImage();
// Now that data has been written into the buffer,
// we need to copy its contents into the image
const Rc<DxvkBuffer> copyBuffer = pResource->GetBuffer(Subresource);
auto formatInfo = imageFormatInfo(image->info().format);
auto subresource = pResource->GetSubresourceFromIndex(
formatInfo->aspectMask, Subresource);
VkExtent3D levelExtent = image
->mipLevelExtent(subresource.mipLevel);
VkImageSubresourceLayers subresourceLayers = {
subresource.aspectMask,
subresource.mipLevel,
subresource.arrayLayer, 1 };
auto videoFormat = pResource->GetFormatMapping().VideoFormatInfo;
if (likely(videoFormat.FormatType == D3D9VideoFormat_None)) {
EmitCs([
cSrcBuffer = copyBuffer,
cDstImage = image,
cDstLayers = subresourceLayers,
cDstLevelExtent = levelExtent
] (DxvkContext* ctx) {
ctx->copyBufferToImage(cDstImage, cDstLayers,
VkOffset3D{ 0, 0, 0 }, cDstLevelExtent,
cSrcBuffer, 0, { 0u, 0u });
});
}
else {
m_converter->ConvertVideoFormat(
videoFormat,
image, subresourceLayers,
copyBuffer);
}
return D3D_OK;
}
void D3D9DeviceEx::GenerateMips(
D3D9CommonTexture* pResource) {
EmitCs([
cImageView = pResource->GetSampleView().Color
] (DxvkContext* ctx) {
ctx->generateMipmaps(cImageView);
});
}
HRESULT D3D9DeviceEx::LockBuffer(
D3D9CommonBuffer* pResource,
UINT OffsetToLock,
UINT SizeToLock,
void** ppbData,
DWORD Flags) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(ppbData == nullptr))
return D3DERR_INVALIDCALL;
if (!m_d3d9Options.allowLockFlagReadonly)
Flags &= ~D3DLOCK_READONLY;
auto& desc = *pResource->Desc();
// Ignore DISCARD if NOOVERWRITE is set
if (unlikely((Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)) == (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)))
Flags &= ~D3DLOCK_DISCARD;
// Ignore DISCARD and NOOVERWRITE if the buffer is not DEFAULT pool (tests + Halo 2)
// The docs say DISCARD and NOOVERWRITE are ignored if the buffer is not DYNAMIC
// but tests say otherwise!
if (desc.Pool != D3DPOOL_DEFAULT)
Flags &= ~(D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE);
// Ignore READONLY if we are a WRITEONLY resource.
if (desc.Usage & D3DUSAGE_WRITEONLY)
Flags &= ~D3DLOCK_READONLY;
// Ignore DONOTWAIT if we are DYNAMIC
// Yes... D3D9 is a good API.
if (desc.Usage & D3DUSAGE_DYNAMIC)
Flags &= ~D3DLOCK_DONOTWAIT;
// We only bounds check for MANAGED.
// (TODO: Apparently this is meant to happen for DYNAMIC too but I am not sure
// how that works given it is meant to be a DIRECT access..?)
// D3D9 does not do region tracking for READONLY locks
// But lets also account for whether we get readback from ProcessVertices
const bool quickRead = ((Flags & D3DLOCK_READONLY) && !pResource->GetReadLocked());
const bool boundsCheck = desc.Pool != D3DPOOL_DEFAULT && !quickRead;
if (boundsCheck) {
// We can only respect this for these cases -- otherwise R/W OOB still get copied on native
// and some stupid games depend on that.
const bool respectUserBounds = !(Flags & D3DLOCK_DISCARD) &&
SizeToLock != 0;
// If we don't respect the bounds, encompass it all in our tests/checks
// These values may be out of range and don't get clamped.
uint32_t offset = respectUserBounds ? OffsetToLock : 0;
uint32_t size = respectUserBounds ? SizeToLock : desc.Size;
pResource->LockRange().Conjoin(D3D9Range(offset, offset + size));
}
Rc<DxvkBuffer> mappingBuffer = pResource->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>();
DxvkBufferSliceHandle physSlice;
if (Flags & D3DLOCK_DISCARD) {
// Allocate a new backing slice for the buffer and set
// it as the 'new' mapped slice. This assumes that the
// only way to invalidate a buffer is by mapping it.
physSlice = pResource->DiscardMapSlice();
EmitCs([
cBuffer = std::move(mappingBuffer),
cBufferSlice = physSlice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cBufferSlice);
});
}
else {
// NOOVERWRITE promises that they will not write in a currently used area.
// Therefore we can skip waiting for these two cases.
// We can also skip waiting if there is not dirty range overlap, if we are one of those resources.
// If we are respecting the bounds ie. (MANAGED) we can test overlap
// of our bounds, otherwise we just ignore this and go for it all the time.
const bool skipWait = (Flags & D3DLOCK_NOOVERWRITE) ||
quickRead ||
(boundsCheck && !pResource->DirtyRange().Overlaps(pResource->LockRange()));
if (!skipWait) {
if (!(Flags & D3DLOCK_DONOTWAIT)) {
pResource->SetReadLocked(false);
pResource->DirtyRange().Clear();
}
if (!WaitForResource(mappingBuffer, Flags))
return D3DERR_WASSTILLDRAWING;
}
// Use map pointer from previous map operation. This
// way we don't have to synchronize with the CS thread
// if the map mode is D3DLOCK_NOOVERWRITE.
physSlice = pResource->GetMappedSlice();
}
uint8_t* data = reinterpret_cast<uint8_t*>(physSlice.mapPtr);
// The offset/size is not clamped to or affected by the desc size.
data += OffsetToLock;
*ppbData = reinterpret_cast<void*>(data);
DWORD oldFlags = pResource->GetMapFlags();
// We need to remove the READONLY flags from the map flags
// if there was ever a non-readonly upload.
if (!(Flags & D3DLOCK_READONLY)) {
oldFlags &= ~D3DLOCK_READONLY;
if (pResource->Desc()->Pool != D3DPOOL_DEFAULT)
pResource->MarkNeedsUpload();
}
pResource->SetMapFlags(Flags | oldFlags);
pResource->IncrementLockCount();
return D3D_OK;
}
HRESULT D3D9DeviceEx::FlushBuffer(
D3D9CommonBuffer* pResource) {
auto dstBuffer = pResource->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>();
auto srcBuffer = pResource->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_STAGING>();
EmitCs([
cDstSlice = dstBuffer,
cSrcSlice = srcBuffer
] (DxvkContext* ctx) {
ctx->copyBuffer(
cDstSlice.buffer(),
cDstSlice.offset(),
cSrcSlice.buffer(),
cSrcSlice.offset(),
cSrcSlice.length());
});
pResource->DirtyRange().Conjoin(pResource->LockRange());
pResource->LockRange().Clear();
pResource->MarkUploaded();
return D3D_OK;
}
HRESULT D3D9DeviceEx::UnlockBuffer(
D3D9CommonBuffer* pResource) {
D3D9DeviceLock lock = LockDevice();
if (pResource->DecrementLockCount() != 0)
return D3D_OK;
if (pResource->GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER)
return D3D_OK;
if (pResource->GetMapFlags() & D3DLOCK_READONLY)
return D3D_OK;
pResource->SetMapFlags(0);
if (pResource->Desc()->Pool != D3DPOOL_DEFAULT)
return D3D_OK;
FlushImplicit(FALSE);
FlushBuffer(pResource);
return D3D_OK;
}
void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) {
m_csThread.dispatchChunk(std::move(chunk));
m_csIsBusy = true;
}
void D3D9DeviceEx::FlushImplicit(BOOL StrongHint) {
// Flush only if the GPU is about to go idle, in
// order to keep the number of submissions low.
uint32_t pending = m_dxvkDevice->pendingSubmissions();
if (StrongHint || pending <= MaxPendingSubmits) {
auto now = dxvk::high_resolution_clock::now();
uint32_t delay = MinFlushIntervalUs
+ IncFlushIntervalUs * pending;
// Prevent flushing too often in short intervals.
if (now - m_lastFlush >= std::chrono::microseconds(delay))
Flush();
}
}
void D3D9DeviceEx::SynchronizeCsThread() {
D3D9DeviceLock lock = LockDevice();
// Dispatch current chunk so that all commands
// recorded prior to this function will be run
FlushCsChunk();
if (m_csThread.isBusy())
m_csThread.synchronize();
}
void D3D9DeviceEx::SetupFPU() {
// Should match d3d9 float behaviour.
#if defined(_MSC_VER)
// For MSVC we can use these cross arch and platform funcs to set the FPU.
// This will work on any platform, x86, x64, ARM, etc.
// Clear exceptions.
_clearfp();
// Disable exceptions
_controlfp(_MCW_EM, _MCW_EM);
#ifndef _WIN64
// Use 24 bit precision
_controlfp(_PC_24, _MCW_PC);
#endif
// Round to nearest
_controlfp(_RC_NEAR, _MCW_RC);
#elif (defined(__GNUC__) || defined(__MINGW32__)) && (defined(__i386__) || defined(__x86_64__) || defined(__ia64))
// For GCC/MinGW we can use inline asm to set it.
// This only works for x86 and x64 processors however.
uint16_t control;
// Get current control word.
__asm__ __volatile__("fnstcw %0" : "=m" (*&control));
// Clear existing settings.
control &= 0xF0C0;
// Disable exceptions
// Use 24 bit precision
// Round to nearest
control |= 0x003F;
// Set new control word.
__asm__ __volatile__("fldcw %0" : : "m" (*&control));
#else
Logger::warn("D3D9DeviceEx::SetupFPU: not supported on this arch.");
#endif
}
int64_t D3D9DeviceEx::DetermineInitialTextureMemory() {
auto memoryProp = m_adapter->GetDXVKAdapter()->memoryProperties();
VkDeviceSize availableTextureMemory = 0;
for (uint32_t i = 0; i < memoryProp.memoryHeapCount; i++) {
VkMemoryHeap& heap = memoryProp.memoryHeaps[i];
if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
availableTextureMemory += memoryProp.memoryHeaps[i].size;
}
constexpr VkDeviceSize Megabytes = 1024 * 1024;
// The value returned is a 32-bit value, so we need to clamp it.
VkDeviceSize maxMemory = (VkDeviceSize(m_d3d9Options.maxAvailableMemory) * Megabytes) - 1;
availableTextureMemory = std::min(availableTextureMemory, maxMemory);
return int64_t(availableTextureMemory);
}
void D3D9DeviceEx::CreateConstantBuffers() {
DxvkBufferCreateInfo info;
info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
info.access = VK_ACCESS_UNIFORM_READ_BIT;
VkMemoryPropertyFlags memoryFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
info.size = m_vsLayout.totalSize();
m_consts[DxsoProgramTypes::VertexShader].buffer = m_dxvkDevice->createBuffer(info, memoryFlags);
info.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
info.size = m_psLayout.totalSize();
m_consts[DxsoProgramTypes::PixelShader].buffer = m_dxvkDevice->createBuffer(info, memoryFlags);
info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
info.size = caps::MaxClipPlanes * sizeof(D3D9ClipPlane);
m_vsClipPlanes = m_dxvkDevice->createBuffer(info, memoryFlags);
info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
info.size = sizeof(D3D9FixedFunctionVS);
m_vsFixedFunction = m_dxvkDevice->createBuffer(info, memoryFlags);
info.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
info.size = sizeof(D3D9FixedFunctionPS);
m_psFixedFunction = m_dxvkDevice->createBuffer(info, memoryFlags);
info.stages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
info.size = sizeof(D3D9SharedPS);
m_psShared = m_dxvkDevice->createBuffer(info, memoryFlags);
info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
info.access = VK_ACCESS_SHADER_READ_BIT;
info.size = CanSWVP() ? sizeof(D3D9FixedFunctionVertexBlendDataSW) : sizeof(D3D9FixedFunctionVertexBlendDataHW);
m_vsVertexBlend = m_dxvkDevice->createBuffer(info, memoryFlags);
auto BindConstantBuffer = [this](
DxsoProgramType shaderStage,
Rc<DxvkBuffer> buffer,
DxsoConstantBuffers cbuffer) {
const uint32_t slotId = computeResourceSlotId(
shaderStage, DxsoBindingType::ConstantBuffer,
cbuffer);
EmitCs([
cSlotId = slotId,
cBuffer = buffer
] (DxvkContext* ctx) {
ctx->bindResourceBuffer(cSlotId,
DxvkBufferSlice(cBuffer, 0, cBuffer->info().size));
});
};
BindConstantBuffer(DxsoProgramTypes::VertexShader, m_consts[DxsoProgramTypes::VertexShader].buffer, DxsoConstantBuffers::VSConstantBuffer);
BindConstantBuffer(DxsoProgramTypes::VertexShader, m_vsClipPlanes, DxsoConstantBuffers::VSClipPlanes);
BindConstantBuffer(DxsoProgramTypes::VertexShader, m_vsFixedFunction, DxsoConstantBuffers::VSFixedFunction);
BindConstantBuffer(DxsoProgramTypes::VertexShader, m_vsVertexBlend, DxsoConstantBuffers::VSVertexBlendData);
BindConstantBuffer(DxsoProgramTypes::PixelShader, m_consts[DxsoProgramTypes::PixelShader].buffer, DxsoConstantBuffers::PSConstantBuffer);
BindConstantBuffer(DxsoProgramTypes::PixelShader, m_psFixedFunction, DxsoConstantBuffers::PSFixedFunction);
BindConstantBuffer(DxsoProgramTypes::PixelShader, m_psShared, DxsoConstantBuffers::PSShared);
m_flags.set(
D3D9DeviceFlag::DirtyClipPlanes);
}
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void D3D9DeviceEx::UploadHardwareConstantSet(void* pData, const SoftwareLayoutType& Src, const ShaderType& Shader) {
const D3D9ConstantSets& constSet = m_consts[ShaderStage];
auto* dst = reinterpret_cast<HardwareLayoutType*>(pData);
if (constSet.meta->maxConstIndexF)
std::memcpy(dst->fConsts, Src.fConsts, constSet.meta->maxConstIndexF * sizeof(Vector4));
if (constSet.meta->maxConstIndexI)
std::memcpy(dst->iConsts, Src.iConsts, constSet.meta->maxConstIndexI * sizeof(Vector4i));
if (constSet.meta->maxConstIndexB)
dst->bConsts[0] = Src.bConsts[0];
}
template <typename SoftwareLayoutType, typename ShaderType>
inline void D3D9DeviceEx::UploadSoftwareConstantSet(void* pData, const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
const D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader];
auto dst = reinterpret_cast<uint8_t*>(pData);
if (constSet.meta->maxConstIndexF)
std::memcpy(dst + Layout.floatOffset(), Src.fConsts, constSet.meta->maxConstIndexF * sizeof(Vector4));
if (constSet.meta->maxConstIndexI)
std::memcpy(dst + Layout.intOffset(), Src.iConsts, constSet.meta->maxConstIndexI * sizeof(Vector4i));
if (constSet.meta->maxConstIndexB)
std::memcpy(dst + Layout.bitmaskOffset(), Src.bConsts, Layout.bitmaskSize());
}
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
D3D9ConstantSets& constSet = m_consts[ShaderStage];
if (!constSet.dirty)
return;
constSet.dirty = false;
DxvkBufferSliceHandle slice = constSet.buffer->allocSlice();
EmitCs([
cBuffer = constSet.buffer,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
if constexpr (ShaderStage == DxsoProgramType::PixelShader)
UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader);
else if (likely(!CanSWVP()))
UploadHardwareConstantSet<ShaderStage, HardwareLayoutType>(slice.mapPtr, Src, Shader);
else
UploadSoftwareConstantSet(slice.mapPtr, Src, Layout, Shader);
if (constSet.meta->needsConstantCopies) {
Vector4* data = reinterpret_cast<Vector4*>(slice.mapPtr);
auto& shaderConsts = GetCommonShader(Shader)->GetConstants();
for (const auto& constant : shaderConsts)
data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
}
}
template <DxsoProgramType ShaderStage>
void D3D9DeviceEx::UploadConstants() {
if constexpr (ShaderStage == DxsoProgramTypes::VertexShader)
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsVSHardware>(m_state.vsConsts, m_vsLayout, m_state.vertexShader);
else
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsPS> (m_state.psConsts, m_psLayout, m_state.pixelShader);
}
void D3D9DeviceEx::UpdateClipPlanes() {
m_flags.clr(D3D9DeviceFlag::DirtyClipPlanes);
auto slice = m_vsClipPlanes->allocSlice();
auto dst = reinterpret_cast<D3D9ClipPlane*>(slice.mapPtr);
for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) {
dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i))
? m_state.clipPlanes[i]
: D3D9ClipPlane();
}
EmitCs([
cBuffer = m_vsClipPlanes,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
}
template <uint32_t Offset, uint32_t Length>
void D3D9DeviceEx::UpdatePushConstant(const void* pData) {
struct ConstantData { uint8_t Data[Length]; };
auto* constData = reinterpret_cast<const ConstantData*>(pData);
EmitCs([
cData = *constData
](DxvkContext* ctx) {
ctx->pushConstants(Offset, Length, &cData);
});
}
template <D3D9RenderStateItem Item>
void D3D9DeviceEx::UpdatePushConstant() {
auto& rs = m_state.renderStates;
if constexpr (Item == D3D9RenderStateItem::AlphaRef) {
float alpha = float(rs[D3DRS_ALPHAREF]) / 255.0f;
UpdatePushConstant<offsetof(D3D9RenderStateInfo, alphaRef), sizeof(float)>(&alpha);
}
else if constexpr (Item == D3D9RenderStateItem::FogColor) {
Vector4 color;
DecodeD3DCOLOR(D3DCOLOR(rs[D3DRS_FOGCOLOR]), color.data);
UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogColor), sizeof(D3D9RenderStateInfo::fogColor)>(&color);
}
else if constexpr (Item == D3D9RenderStateItem::FogDensity) {
float density = bit::cast<float>(rs[D3DRS_FOGDENSITY]);
UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogDensity), sizeof(float)>(&density);
}
else if constexpr (Item == D3D9RenderStateItem::FogEnd) {
float end = bit::cast<float>(rs[D3DRS_FOGEND]);
UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogEnd), sizeof(float)>(&end);
}
else if constexpr (Item == D3D9RenderStateItem::FogScale) {
float end = bit::cast<float>(rs[D3DRS_FOGEND]);
float start = bit::cast<float>(rs[D3DRS_FOGSTART]);
float scale = 1.0f / (end - start);
if (!std::isfinite(scale))
scale = 0.0f;
UpdatePushConstant<offsetof(D3D9RenderStateInfo, fogScale), sizeof(float)>(&scale);
}
else if constexpr (Item == D3D9RenderStateItem::PointSize) {
UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSize), sizeof(float)>(&rs[D3DRS_POINTSIZE]);
}
else if constexpr (Item == D3D9RenderStateItem::PointSizeMin) {
UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSizeMin), sizeof(float)>(&rs[D3DRS_POINTSIZE_MIN]);
}
else if constexpr (Item == D3D9RenderStateItem::PointSizeMax) {
UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointSizeMax), sizeof(float)>(&rs[D3DRS_POINTSIZE_MAX]);
}
else if constexpr (Item == D3D9RenderStateItem::PointScaleA) {
float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_A]);
scale /= float(m_state.viewport.Height * m_state.viewport.Height);
UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleA), sizeof(float)>(&scale);
}
else if constexpr (Item == D3D9RenderStateItem::PointScaleB) {
float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_B]);
scale /= float(m_state.viewport.Height * m_state.viewport.Height);
UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleB), sizeof(float)>(&scale);
}
else if constexpr (Item == D3D9RenderStateItem::PointScaleC) {
float scale = bit::cast<float>(rs[D3DRS_POINTSCALE_C]);
scale /= float(m_state.viewport.Height * m_state.viewport.Height);
UpdatePushConstant<offsetof(D3D9RenderStateInfo, pointScaleC), sizeof(float)>(&scale);
}
else
Logger::warn("D3D9: Invalid push constant set to update.");
}
void D3D9DeviceEx::Flush() {
D3D9DeviceLock lock = LockDevice();
m_initializer->Flush();
if (m_csIsBusy || !m_csChunk->empty()) {
// Add commands to flush the threaded
// context, then flush the command list
EmitCs([](DxvkContext* ctx) {
ctx->flushCommandList();
});
FlushCsChunk();
// Reset flush timer used for implicit flushes
m_lastFlush = dxvk::high_resolution_clock::now();
m_csIsBusy = false;
}
}
inline D3D9ShaderMasks D3D9DeviceEx::GetShaderMasks() {
const auto* shader = GetCommonShader(m_state.pixelShader);
if (likely(shader != nullptr))
return shader->GetShaderMask();
// TODO: What fixed function textures are in use?
// Currently we are making all 8 of them as in use here.
// The RT output is always 0 for fixed function.
return D3D9ShaderMasks{ 0b1111111, 0b1 };
}
inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) {
const uint32_t bit = 1 << index;
m_activeRTs &= ~bit;
if (m_state.renderTargets[index] != nullptr &&
m_state.renderTargets[index]->GetBaseTexture() != nullptr &&
m_state.renderStates[ColorWriteIndex(index)])
m_activeRTs |= bit;
UpdateActiveHazards();
}
inline void D3D9DeviceEx::UpdateActiveRTTextures(uint32_t index) {
const uint32_t bit = 1 << index;
m_activeRTTextures &= ~bit;
auto tex = GetCommonTexture(m_state.textures[index]);
if (tex != nullptr && tex->IsRenderTarget())
m_activeRTTextures |= bit;
UpdateActiveHazards();
}
inline void D3D9DeviceEx::UpdateActiveHazards() {
auto masks = GetShaderMasks();
masks.rtMask &= m_activeRTs;
masks.samplerMask &= m_activeRTTextures;
m_activeHazards = 0;
for (uint32_t rt = masks.rtMask; rt; rt &= rt - 1) {
for (uint32_t sampler = masks.samplerMask; sampler; sampler &= sampler - 1) {
D3D9Surface* rtSurf = m_state.renderTargets[bit::tzcnt(rt)].ptr();
IDirect3DBaseTexture9* rtBase = rtSurf->GetBaseTexture();
IDirect3DBaseTexture9* texBase = m_state.textures[bit::tzcnt(sampler)];
// HACK: Don't mark for hazards if we aren't rendering to mip 0!
// Some games use screenspace passes like this for blurring
// Sampling from mip 0 (texture) -> mip 1 (rt)
// and we'd trigger the hazard path otherwise which is unnecessary,
// and would shove us into GENERAL and emitting readback barriers.
if (likely(rtSurf->GetMipLevel() != 0 || rtBase != texBase))
continue;
m_activeHazards |= 1 << bit::tzcnt(rt);
}
}
}
void D3D9DeviceEx::MarkRenderHazards() {
for (uint32_t rt = m_activeHazards; rt; rt &= rt - 1) {
// Guaranteed to not be nullptr...
auto tex = m_state.renderTargets[bit::tzcnt(rt)]->GetCommonTexture();
if (unlikely(!tex->MarkHazardous())) {
TransitionImage(tex, VK_IMAGE_LAYOUT_GENERAL);
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
}
}
}
template <bool Points>
void D3D9DeviceEx::UpdatePointMode() {
if constexpr (!Points) {
m_lastPointMode = 0;
EmitCs([](DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, 0);
});
}
else {
auto& rs = m_state.renderStates;
const bool scale = rs[D3DRS_POINTSCALEENABLE] && !UseProgrammableVS();
const bool sprite = rs[D3DRS_POINTSPRITEENABLE];
const uint32_t scaleBit = scale ? 1u : 0u;
const uint32_t spriteBit = sprite ? 2u : 0u;
uint32_t mode = scaleBit | spriteBit;
if (rs[D3DRS_POINTSCALEENABLE] && m_flags.test(D3D9DeviceFlag::DirtyPointScale)) {
m_flags.clr(D3D9DeviceFlag::DirtyPointScale);
UpdatePushConstant<D3D9RenderStateItem::PointScaleA>();
UpdatePushConstant<D3D9RenderStateItem::PointScaleB>();
UpdatePushConstant<D3D9RenderStateItem::PointScaleC>();
}
if (unlikely(mode != m_lastPointMode)) {
EmitCs([cMode = mode] (DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PointMode, cMode);
});
m_lastPointMode = mode;
}
}
}
void D3D9DeviceEx::UpdateFog() {
auto& rs = m_state.renderStates;
bool fogEnabled = rs[D3DRS_FOGENABLE];
bool pixelFog = rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE && fogEnabled;
bool vertexFog = rs[D3DRS_FOGVERTEXMODE] != D3DFOG_NONE && fogEnabled && !pixelFog;
auto UpdateFogConstants = [&](D3DFOGMODE FogMode) {
if (m_flags.test(D3D9DeviceFlag::DirtyFogColor)) {
m_flags.clr(D3D9DeviceFlag::DirtyFogColor);
UpdatePushConstant<D3D9RenderStateItem::FogColor>();
}
if (FogMode == D3DFOG_LINEAR) {
if (m_flags.test(D3D9DeviceFlag::DirtyFogScale)) {
m_flags.clr(D3D9DeviceFlag::DirtyFogScale);
UpdatePushConstant<D3D9RenderStateItem::FogScale>();
}
if (m_flags.test(D3D9DeviceFlag::DirtyFogEnd)) {
m_flags.clr(D3D9DeviceFlag::DirtyFogEnd);
UpdatePushConstant<D3D9RenderStateItem::FogEnd>();
}
}
else if (FogMode == D3DFOG_EXP || FogMode == D3DFOG_EXP2) {
if (m_flags.test(D3D9DeviceFlag::DirtyFogDensity)) {
m_flags.clr(D3D9DeviceFlag::DirtyFogDensity);
UpdatePushConstant<D3D9RenderStateItem::FogDensity>();
}
}
};
if (vertexFog) {
D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGVERTEXMODE]);
UpdateFogConstants(mode);
if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) {
m_flags.clr(D3D9DeviceFlag::DirtyFogState);
EmitCs([cMode = mode] (DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true);
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, cMode);
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE);
});
}
}
else if (pixelFog) {
D3DFOGMODE mode = D3DFOGMODE(rs[D3DRS_FOGTABLEMODE]);
UpdateFogConstants(mode);
if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) {
m_flags.clr(D3D9DeviceFlag::DirtyFogState);
EmitCs([cMode = mode] (DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, true);
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE);
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, cMode);
});
}
}
else {
if (fogEnabled)
UpdateFogConstants(D3DFOG_NONE);
if (m_flags.test(D3D9DeviceFlag::DirtyFogState)) {
m_flags.clr(D3D9DeviceFlag::DirtyFogState);
EmitCs([cEnabled = fogEnabled] (DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::FogEnabled, cEnabled);
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::VertexFogMode, D3DFOG_NONE);
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::PixelFogMode, D3DFOG_NONE);
});
}
}
}
void D3D9DeviceEx::BindFramebuffer() {
m_flags.clr(D3D9DeviceFlag::DirtyFramebuffer);
DxvkRenderTargets attachments;
bool srgb = m_state.renderStates[D3DRS_SRGBWRITEENABLE];
// D3D9 doesn't have the concept of a framebuffer object,
// so we'll just create a new one every time the render
// target bindings are updated. Set up the attachments.
VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;
for (UINT i = 0; i < m_state.renderTargets.size(); i++) {
if (m_state.renderTargets[i] != nullptr && !m_state.renderTargets[i]->IsNull()) {
const DxvkImageCreateInfo& rtImageInfo = m_state.renderTargets[i]->GetCommonTexture()->GetImage()->info();
if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM))
sampleCount = rtImageInfo.sampleCount;
else if (unlikely(sampleCount != rtImageInfo.sampleCount))
continue;
attachments.color[i] = {
m_state.renderTargets[i]->GetRenderTargetView(srgb),
m_state.renderTargets[i]->GetRenderTargetLayout() };
}
}
if (m_state.depthStencil != nullptr) {
const DxvkImageCreateInfo& dsImageInfo = m_state.depthStencil->GetCommonTexture()->GetImage()->info();
if (likely(sampleCount == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM || sampleCount == dsImageInfo.sampleCount)) {
attachments.depth = {
m_state.depthStencil->GetDepthStencilView(),
m_state.depthStencil->GetDepthStencilLayout() };
}
}
// Create and bind the framebuffer object to the context
EmitCs([
cAttachments = std::move(attachments)
] (DxvkContext* ctx) {
ctx->bindRenderTargets(cAttachments);
});
}
void D3D9DeviceEx::BindViewportAndScissor() {
m_flags.clr(D3D9DeviceFlag::DirtyViewportScissor);
VkViewport viewport;
VkRect2D scissor;
// D3D9's coordinate system has its origin in the bottom left,
// but the viewport coordinates are aligned to the top-left
// corner so we can get away with flipping the viewport.
const D3DVIEWPORT9& vp = m_state.viewport;
// Correctness Factor for 1/2 texel offset
float cf = 0.5f;
// HACK: UE3 bug re. tonemapper + shadow sampling being red:-
// We need to bias this, except when it's
// NOT powers of two in order to make
// imprecision biased towards infinity.
if ((vp.Width & (vp.Width - 1)) == 0
&& (vp.Height & (vp.Height - 1)) == 0)
cf -= 1.0f / 128.0f;
viewport = VkViewport{
float(vp.X) + cf, float(vp.Height + vp.Y) + cf,
float(vp.Width), -float(vp.Height),
vp.MinZ, vp.MaxZ,
};
// Scissor rectangles. Vulkan does not provide an easy way
// to disable the scissor test, so we'll have to set scissor
// rects that are at least as large as the framebuffer.
bool enableScissorTest = m_state.renderStates[D3DRS_SCISSORTESTENABLE];
if (enableScissorTest) {
RECT sr = m_state.scissorRect;
VkOffset2D srPosA;
srPosA.x = std::max<int32_t>(0, sr.left);
srPosA.y = std::max<int32_t>(0, sr.top);
VkOffset2D srPosB;
srPosB.x = std::max<int32_t>(srPosA.x, sr.right);
srPosB.y = std::max<int32_t>(srPosA.y, sr.bottom);
VkExtent2D srSize;
srSize.width = uint32_t(srPosB.x - srPosA.x);
srSize.height = uint32_t(srPosB.y - srPosA.y);
scissor = VkRect2D{ srPosA, srSize };
}
else {
scissor = VkRect2D{
VkOffset2D { int32_t(vp.X), int32_t(vp.Y) },
VkExtent2D { vp.Width, vp.Height }};
}
EmitCs([
cViewport = viewport,
cScissor = scissor
] (DxvkContext* ctx) {
ctx->setViewports(
1,
&cViewport,
&cScissor);
});
}
void D3D9DeviceEx::BindMultiSampleState() {
m_flags.clr(D3D9DeviceFlag::DirtyMultiSampleState);
DxvkMultisampleState msState;
msState.sampleMask = m_flags.test(D3D9DeviceFlag::ValidSampleMask)
? m_state.renderStates[D3DRS_MULTISAMPLEMASK]
: 0xffffffff;
msState.enableAlphaToCoverage = IsAlphaToCoverageEnabled();
EmitCs([
cState = msState
] (DxvkContext* ctx) {
ctx->setMultisampleState(cState);
});
}
void D3D9DeviceEx::BindBlendState() {
m_flags.clr(D3D9DeviceFlag::DirtyBlendState);
auto& state = m_state.renderStates;
bool separateAlpha = state[D3DRS_SEPARATEALPHABLENDENABLE];
DxvkBlendMode mode;
mode.enableBlending = state[D3DRS_ALPHABLENDENABLE] != FALSE;
D3D9BlendState color, alpha;
color.Src = D3DBLEND(state[D3DRS_SRCBLEND]);
color.Dst = D3DBLEND(state[D3DRS_DESTBLEND]);
color.Op = D3DBLENDOP(state[D3DRS_BLENDOP]);
FixupBlendState(color);
if (separateAlpha) {
alpha.Src = D3DBLEND(state[D3DRS_SRCBLENDALPHA]);
alpha.Dst = D3DBLEND(state[D3DRS_DESTBLENDALPHA]);
alpha.Op = D3DBLENDOP(state[D3DRS_BLENDOPALPHA]);
FixupBlendState(alpha);
}
else
alpha = color;
mode.colorSrcFactor = DecodeBlendFactor(color.Src, false);
mode.colorDstFactor = DecodeBlendFactor(color.Dst, false);
mode.colorBlendOp = DecodeBlendOp (color.Op);
mode.alphaSrcFactor = DecodeBlendFactor(alpha.Src, true);
mode.alphaDstFactor = DecodeBlendFactor(alpha.Dst, true);
mode.alphaBlendOp = DecodeBlendOp (alpha.Op);
mode.writeMask = state[ColorWriteIndex(0)];
std::array<VkColorComponentFlags, 3> extraWriteMasks;
for (uint32_t i = 0; i < 3; i++)
extraWriteMasks[i] = state[ColorWriteIndex(i + 1)];
EmitCs([
cMode = mode,
cWriteMasks = extraWriteMasks,
cAlphaMasks = m_alphaSwizzleRTs
](DxvkContext* ctx) {
for (uint32_t i = 0; i < 4; i++) {
DxvkBlendMode mode = cMode;
if (i != 0)
mode.writeMask = cWriteMasks[i - 1];
const bool alphaSwizzle = cAlphaMasks & (1 << i);
auto NormalizeFactor = [alphaSwizzle](VkBlendFactor Factor) {
if (alphaSwizzle) {
if (Factor == VK_BLEND_FACTOR_DST_ALPHA)
return VK_BLEND_FACTOR_ONE;
else if (Factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA)
return VK_BLEND_FACTOR_ZERO;
}
return Factor;
};
mode.colorSrcFactor = NormalizeFactor(mode.colorSrcFactor);
mode.colorDstFactor = NormalizeFactor(mode.colorDstFactor);
mode.alphaSrcFactor = NormalizeFactor(mode.alphaSrcFactor);
mode.alphaDstFactor = NormalizeFactor(mode.alphaDstFactor);
ctx->setBlendMode(i, mode);
}
});
}
void D3D9DeviceEx::BindBlendFactor() {
DxvkBlendConstants blendConstants;
DecodeD3DCOLOR(
D3DCOLOR(m_state.renderStates[D3DRS_BLENDFACTOR]),
reinterpret_cast<float*>(&blendConstants));
EmitCs([
cBlendConstants = blendConstants
](DxvkContext* ctx) {
ctx->setBlendConstants(cBlendConstants);
});
}
void D3D9DeviceEx::BindDepthStencilState() {
m_flags.clr(D3D9DeviceFlag::DirtyDepthStencilState);
auto& rs = m_state.renderStates;
bool stencil = rs[D3DRS_STENCILENABLE];
bool twoSidedStencil = stencil && rs[D3DRS_TWOSIDEDSTENCILMODE];
DxvkDepthStencilState state;
state.enableDepthTest = rs[D3DRS_ZENABLE] != FALSE;
state.enableDepthWrite = rs[D3DRS_ZWRITEENABLE] != FALSE;
state.enableStencilTest = stencil;
state.depthCompareOp = DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ZFUNC]));
if (stencil) {
state.stencilOpFront.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILFAIL]));
state.stencilOpFront.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILPASS]));
state.stencilOpFront.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_STENCILZFAIL]));
state.stencilOpFront.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_STENCILFUNC]));
state.stencilOpFront.compareMask = uint32_t(rs[D3DRS_STENCILMASK]);
state.stencilOpFront.writeMask = uint32_t(rs[D3DRS_STENCILWRITEMASK]);
state.stencilOpFront.reference = 0;
}
else
state.stencilOpFront = VkStencilOpState();
if (twoSidedStencil) {
state.stencilOpBack.failOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILFAIL]));
state.stencilOpBack.passOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILPASS]));
state.stencilOpBack.depthFailOp = DecodeStencilOp(D3DSTENCILOP(rs[D3DRS_CCW_STENCILZFAIL]));
state.stencilOpBack.compareOp = DecodeCompareOp(D3DCMPFUNC (rs[D3DRS_CCW_STENCILFUNC]));
state.stencilOpBack.compareMask = state.stencilOpFront.compareMask;
state.stencilOpBack.writeMask = state.stencilOpFront.writeMask;
state.stencilOpBack.reference = 0;
}
else
state.stencilOpBack = state.stencilOpFront;
EmitCs([
cState = state
](DxvkContext* ctx) {
ctx->setDepthStencilState(cState);
});
}
void D3D9DeviceEx::BindRasterizerState() {
m_flags.clr(D3D9DeviceFlag::DirtyRasterizerState);
auto& rs = m_state.renderStates;
DxvkRasterizerState state;
state.cullMode = DecodeCullMode(D3DCULL(rs[D3DRS_CULLMODE]));
state.depthBiasEnable = IsDepthBiasEnabled();
state.depthClipEnable = true;
state.frontFace = VK_FRONT_FACE_CLOCKWISE;
state.polygonMode = DecodeFillMode(D3DFILLMODE(rs[D3DRS_FILLMODE]));
state.sampleCount = 0;
EmitCs([
cState = state
](DxvkContext* ctx) {
ctx->setRasterizerState(cState);
});
}
void D3D9DeviceEx::BindDepthBias() {
m_flags.clr(D3D9DeviceFlag::DirtyDepthBias);
auto& rs = m_state.renderStates;
float depthBias = bit::cast<float>(rs[D3DRS_DEPTHBIAS]) * m_depthBiasScale;
float slopeScaledDepthBias = bit::cast<float>(rs[D3DRS_SLOPESCALEDEPTHBIAS]);
DxvkDepthBias biases;
biases.depthBiasConstant = depthBias;
biases.depthBiasSlope = slopeScaledDepthBias;
biases.depthBiasClamp = 0.0f;
EmitCs([
cBiases = biases
](DxvkContext* ctx) {
ctx->setDepthBias(cBiases);
});
}
void D3D9DeviceEx::BindAlphaTestState() {
m_flags.clr(D3D9DeviceFlag::DirtyAlphaTestState);
auto& rs = m_state.renderStates;
VkCompareOp alphaOp = IsAlphaTestEnabled()
? DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ALPHAFUNC]))
: VK_COMPARE_OP_ALWAYS;
EmitCs([cAlphaOp = alphaOp] (DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaTestEnable, cAlphaOp != VK_COMPARE_OP_ALWAYS);
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaCompareOp, cAlphaOp);
});
}
void D3D9DeviceEx::BindDepthStencilRefrence() {
auto& rs = m_state.renderStates;
uint32_t ref = uint32_t(rs[D3DRS_STENCILREF]);
EmitCs([cRef = ref] (DxvkContext* ctx) {
ctx->setStencilReference(cRef);
});
}
void D3D9DeviceEx::BindSampler(DWORD Sampler) {
auto& state = m_state.samplerStates[Sampler];
D3D9SamplerKey key;
key.AddressU = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSU]);
key.AddressV = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSV]);
key.AddressW = D3DTEXTUREADDRESS(state[D3DSAMP_ADDRESSW]);
key.MagFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MAGFILTER]);
key.MinFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MINFILTER]);
key.MipFilter = D3DTEXTUREFILTERTYPE(state[D3DSAMP_MIPFILTER]);
key.MaxAnisotropy = state[D3DSAMP_MAXANISOTROPY];
key.MipmapLodBias = bit::cast<float>(state[D3DSAMP_MIPMAPLODBIAS]);
key.MaxMipLevel = state[D3DSAMP_MAXMIPLEVEL];
DecodeD3DCOLOR(D3DCOLOR(state[D3DSAMP_BORDERCOLOR]), key.BorderColor);
if (m_d3d9Options.samplerAnisotropy != -1) {
if (key.MagFilter == D3DTEXF_LINEAR)
key.MagFilter = D3DTEXF_ANISOTROPIC;
if (key.MinFilter == D3DTEXF_LINEAR)
key.MinFilter = D3DTEXF_ANISOTROPIC;
key.MaxAnisotropy = m_d3d9Options.samplerAnisotropy;
}
NormalizeSamplerKey(key);
auto samplerInfo = RemapStateSamplerShader(Sampler);
const uint32_t colorSlot = computeResourceSlotId(
samplerInfo.first, DxsoBindingType::ColorImage,
samplerInfo.second);
const uint32_t depthSlot = computeResourceSlotId(
samplerInfo.first, DxsoBindingType::DepthImage,
samplerInfo.second);
EmitCs([this,
cColorSlot = colorSlot,
cDepthSlot = depthSlot,
cKey = key
] (DxvkContext* ctx) {
auto pair = m_samplers.find(cKey);
if (pair != m_samplers.end()) {
ctx->bindResourceSampler(cColorSlot, pair->second.color);
ctx->bindResourceSampler(cDepthSlot, pair->second.depth);
return;
}
auto mipFilter = DecodeMipFilter(cKey.MipFilter);
DxvkSamplerCreateInfo colorInfo;
colorInfo.addressModeU = DecodeAddressMode(cKey.AddressU);
colorInfo.addressModeV = DecodeAddressMode(cKey.AddressV);
colorInfo.addressModeW = DecodeAddressMode(cKey.AddressW);
colorInfo.compareToDepth = VK_FALSE;
colorInfo.compareOp = VK_COMPARE_OP_NEVER;
colorInfo.magFilter = DecodeFilter(cKey.MagFilter);
colorInfo.minFilter = DecodeFilter(cKey.MinFilter);
colorInfo.mipmapMode = mipFilter.MipFilter;
colorInfo.maxAnisotropy = float(cKey.MaxAnisotropy);
colorInfo.useAnisotropy = IsAnisotropic(cKey.MinFilter)
|| IsAnisotropic(cKey.MagFilter);
colorInfo.mipmapLodBias = cKey.MipmapLodBias;
colorInfo.mipmapLodMin = mipFilter.MipsEnabled ? float(cKey.MaxMipLevel) : 0;
colorInfo.mipmapLodMax = mipFilter.MipsEnabled ? FLT_MAX : 0;
colorInfo.usePixelCoord = VK_FALSE;
for (uint32_t i = 0; i < 4; i++)
colorInfo.borderColor.float32[i] = cKey.BorderColor[i];
// HACK: Let's get OPAQUE_WHITE border color over
// TRANSPARENT_BLACK if the border RGB is white.
if (colorInfo.borderColor.float32[0] == 1.0f
&& colorInfo.borderColor.float32[1] == 1.0f
&& colorInfo.borderColor.float32[2] == 1.0f) {
// Then set the alpha to 1.
colorInfo.borderColor.float32[3] = 1.0f;
}
DxvkSamplerCreateInfo depthInfo = colorInfo;
depthInfo.compareToDepth = VK_TRUE;
depthInfo.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
depthInfo.magFilter = VK_FILTER_LINEAR;
depthInfo.minFilter = VK_FILTER_LINEAR;
try {
D3D9SamplerPair pair;
pair.color = m_dxvkDevice->createSampler(colorInfo);
pair.depth = m_dxvkDevice->createSampler(depthInfo);
m_samplerCount++;
m_samplers.insert(std::make_pair(cKey, pair));
ctx->bindResourceSampler(cColorSlot, pair.color);
ctx->bindResourceSampler(cDepthSlot, pair.depth);
}
catch (const DxvkError& e) {
Logger::err(e.message());
}
});
}
void D3D9DeviceEx::BindTexture(DWORD StateSampler) {
auto shaderSampler = RemapStateSamplerShader(StateSampler);
uint32_t colorSlot = computeResourceSlotId(shaderSampler.first,
DxsoBindingType::ColorImage, uint32_t(shaderSampler.second));
uint32_t depthSlot = computeResourceSlotId(shaderSampler.first,
DxsoBindingType::DepthImage, uint32_t(shaderSampler.second));
const bool srgb =
m_state.samplerStates[StateSampler][D3DSAMP_SRGBTEXTURE];
D3D9CommonTexture* commonTex =
GetCommonTexture(m_state.textures[StateSampler]);
// For all our pixel shader textures
if (likely(StateSampler < 16)) {
const uint32_t offset = StateSampler * 2;
const uint32_t textureType = commonTex != nullptr
? uint32_t(commonTex->GetType() - D3DRTYPE_TEXTURE)
: 0;
const uint32_t textureBitMask = 0b11u << offset;
const uint32_t textureBits = textureType << offset;
m_samplerTypeBitfield &= ~textureBitMask;
m_samplerTypeBitfield |= textureBits;
}
if (commonTex == nullptr) {
EmitCs([
cColorSlot = colorSlot,
cDepthSlot = depthSlot
](DxvkContext* ctx) {
ctx->bindResourceView(cColorSlot, nullptr, nullptr);
ctx->bindResourceView(cDepthSlot, nullptr, nullptr);
});
return;
}
EmitCs([
cColorSlot = colorSlot,
cDepthSlot = depthSlot,
cDepth = commonTex->IsShadow(),
cImageView = commonTex->GetSampleView().Pick(srgb)
](DxvkContext* ctx) {
ctx->bindResourceView(cColorSlot, !cDepth ? cImageView : nullptr, nullptr);
ctx->bindResourceView(cDepthSlot, cDepth ? cImageView : nullptr, nullptr);
});
}
void D3D9DeviceEx::UndirtySamplers() {
for (uint32_t dirty = m_dirtySamplerStates; dirty; dirty &= dirty - 1)
BindSampler(bit::tzcnt(dirty));
m_dirtySamplerStates = 0;
}
void D3D9DeviceEx::MarkSamplersDirty() {
m_dirtySamplerStates = 0x001fffff; // 21 bits.
}
D3D9DrawInfo D3D9DeviceEx::GenerateDrawInfo(
D3DPRIMITIVETYPE PrimitiveType,
UINT PrimitiveCount,
UINT InstanceCount) {
D3D9DrawInfo drawInfo;
drawInfo.vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount);
drawInfo.instanceCount = m_iaState.streamsInstanced & m_iaState.streamsUsed
? InstanceCount
: 1u;
return drawInfo;
}
uint32_t D3D9DeviceEx::GetInstanceCount() const {
return std::max(m_state.streamFreq[0] & 0x7FFFFFu, 1u);
}
void D3D9DeviceEx::PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool up) {
if (unlikely(m_activeHazards != 0)) {
EmitCs([](DxvkContext* ctx) {
ctx->emitRenderTargetReadbackBarrier();
});
if (m_d3d9Options.generalHazards)
MarkRenderHazards();
}
for (uint32_t i = 0; i < caps::MaxStreams; i++) {
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
if (vbo != nullptr && vbo->NeedsUpload())
FlushBuffer(vbo);
}
auto* ibo = GetCommonBuffer(m_state.indices);
if (ibo != nullptr && ibo->NeedsUpload())
FlushBuffer(ibo);
UpdateFog();
if (m_flags.test(D3D9DeviceFlag::DirtyFramebuffer))
BindFramebuffer();
if (m_flags.test(D3D9DeviceFlag::DirtyViewportScissor))
BindViewportAndScissor();
if (m_dirtySamplerStates)
UndirtySamplers();
if (m_flags.test(D3D9DeviceFlag::DirtyBlendState))
BindBlendState();
if (m_flags.test(D3D9DeviceFlag::DirtyDepthStencilState))
BindDepthStencilState();
if (m_flags.test(D3D9DeviceFlag::DirtyRasterizerState))
BindRasterizerState();
if (m_flags.test(D3D9DeviceFlag::DirtyDepthBias))
BindDepthBias();
if (m_flags.test(D3D9DeviceFlag::DirtyMultiSampleState))
BindMultiSampleState();
if (m_flags.test(D3D9DeviceFlag::DirtyAlphaTestState))
BindAlphaTestState();
if (m_flags.test(D3D9DeviceFlag::DirtyClipPlanes))
UpdateClipPlanes();
if (PrimitiveType == D3DPT_POINTLIST)
UpdatePointMode<true>();
else if (m_lastPointMode != 0)
UpdatePointMode<false>();
if (!up && m_flags.test(D3D9DeviceFlag::UpDirtiedVertices)) {
m_flags.clr(D3D9DeviceFlag::UpDirtiedVertices);
if (m_state.vertexBuffers[0].vertexBuffer != nullptr)
BindVertexBuffer(0,
m_state.vertexBuffers[0].vertexBuffer.ptr(),
m_state.vertexBuffers[0].offset,
m_state.vertexBuffers[0].stride);
}
if (!up && m_flags.test(D3D9DeviceFlag::UpDirtiedIndices)) {
m_flags.clr(D3D9DeviceFlag::UpDirtiedIndices);
BindIndices();
}
if (likely(UseProgrammableVS())) {
if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) {
m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
BindShader<DxsoProgramType::VertexShader>(
GetCommonShader(m_state.vertexShader),
GetVertexShaderPermutation());
}
UploadConstants<DxsoProgramTypes::VertexShader>();
}
else
UpdateFixedFunctionVS();
if (m_flags.test(D3D9DeviceFlag::DirtyInputLayout))
BindInputLayout();
auto UpdateSamplerTypes = [&](uint32_t types, uint32_t projections) {
if (m_lastSamplerTypeBitfield != types)
UpdateSamplerSpecConsant(types);
if (m_lastProjectionBitfield != projections)
UpdateProjectionSpecConstant(projections);
};
if (likely(UseProgrammablePS())) {
UploadConstants<DxsoProgramTypes::PixelShader>();
if (GetCommonShader(m_state.pixelShader)->GetInfo().majorVersion() >= 2)
UpdateSamplerTypes(m_d3d9Options.forceSamplerTypeSpecConstants ? m_samplerTypeBitfield : 0u, 0u);
else
UpdateSamplerTypes(m_samplerTypeBitfield, m_projectionBitfield); // For implicit samplers...
}
else {
UpdateSamplerTypes(0u, 0u);
UpdateFixedFunctionPS();
}
if (m_flags.test(D3D9DeviceFlag::DirtySharedPixelShaderData)) {
m_flags.clr(D3D9DeviceFlag::DirtySharedPixelShaderData);
DxvkBufferSliceHandle slice = m_psShared->allocSlice();
EmitCs([
cBuffer = m_psShared,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
D3D9SharedPS* data = reinterpret_cast<D3D9SharedPS*>(slice.mapPtr);
for (uint32_t i = 0; i < caps::TextureStageCount; i++) {
DecodeD3DCOLOR(D3DCOLOR(m_state.textureStages[i][D3DTSS_CONSTANT]), data->Stages[i].Constant);
// Flip major-ness so we can get away with a nice easy
// dot in the shader without complex access
data->Stages[i].BumpEnvMat[0][0] = bit::cast<float>(m_state.textureStages[i][D3DTSS_BUMPENVMAT00]);
data->Stages[i].BumpEnvMat[1][0] = bit::cast<float>(m_state.textureStages[i][D3DTSS_BUMPENVMAT01]);
data->Stages[i].BumpEnvMat[0][1] = bit::cast<float>(m_state.textureStages[i][D3DTSS_BUMPENVMAT10]);
data->Stages[i].BumpEnvMat[1][1] = bit::cast<float>(m_state.textureStages[i][D3DTSS_BUMPENVMAT11]);
data->Stages[i].BumpEnvLScale = bit::cast<float>(m_state.textureStages[i][D3DTSS_BUMPENVLSCALE]);
data->Stages[i].BumpEnvLOffset = bit::cast<float>(m_state.textureStages[i][D3DTSS_BUMPENVLOFFSET]);
}
}
if (m_flags.test(D3D9DeviceFlag::DirtyDepthBounds)) {
m_flags.clr(D3D9DeviceFlag::DirtyDepthBounds);
DxvkDepthBounds db;
db.enableDepthBounds = (m_state.renderStates[D3DRS_ADAPTIVETESS_X] == uint32_t(D3D9Format::NVDB));
db.minDepthBounds = bit::cast<float>(m_state.renderStates[D3DRS_ADAPTIVETESS_Z]);
db.maxDepthBounds = bit::cast<float>(m_state.renderStates[D3DRS_ADAPTIVETESS_W]);
EmitCs([
cDepthBounds = db
] (DxvkContext* ctx) {
ctx->setDepthBounds(cDepthBounds);
});
}
}
template <DxsoProgramType ShaderStage>
void D3D9DeviceEx::BindShader(
const D3D9CommonShader* pShaderModule,
D3D9ShaderPermutation Permutation) {
EmitCs([
cShader = pShaderModule->GetShader(Permutation)
] (DxvkContext* ctx) {
ctx->bindShader(GetShaderStage(ShaderStage), cShader);
});
}
void D3D9DeviceEx::BindInputLayout() {
m_flags.clr(D3D9DeviceFlag::DirtyInputLayout);
if (m_state.vertexDecl == nullptr) {
EmitCs([&cIaState = m_iaState] (DxvkContext* ctx) {
cIaState.streamsUsed = 0;
ctx->setInputLayout(0, nullptr, 0, nullptr);
});
}
else {
std::array<uint32_t, caps::MaxStreams> streamFreq;
for (uint32_t i = 0; i < caps::MaxStreams; i++)
streamFreq[i] = m_state.streamFreq[i];
Com<D3D9VertexDecl, false> vertexDecl = m_state.vertexDecl;
Com<D3D9VertexShader, false> vertexShader;
if (UseProgrammableVS())
vertexShader = m_state.vertexShader;
EmitCs([
&cIaState = m_iaState,
cVertexDecl = std::move(vertexDecl),
cVertexShader = std::move(vertexShader),
cStreamsInstanced = m_instancedData,
cStreamFreq = streamFreq
] (DxvkContext* ctx) {
cIaState.streamsInstanced = cStreamsInstanced;
cIaState.streamsUsed = 0;
const auto& elements = cVertexDecl->GetElements();
std::array<DxvkVertexAttribute, 2 * caps::InputRegisterCount> attrList;
std::array<DxvkVertexBinding, 2 * caps::InputRegisterCount> bindList;
uint32_t attrMask = 0;
uint32_t bindMask = 0;
const auto& isgn = cVertexShader != nullptr
? GetCommonShader(cVertexShader)->GetIsgn()
: GetFixedFunctionIsgn();
for (uint32_t i = 0; i < isgn.elemCount; i++) {
const auto& decl = isgn.elems[i];
DxvkVertexAttribute attrib;
attrib.location = i;
attrib.binding = NullStreamIdx;
attrib.format = VK_FORMAT_R32G32B32A32_SFLOAT;
attrib.offset = 0;
for (const auto& element : elements) {
DxsoSemantic elementSemantic = { static_cast<DxsoUsage>(element.Usage), element.UsageIndex };
if (elementSemantic.usage == DxsoUsage::PositionT)
elementSemantic.usage = DxsoUsage::Position;
if (elementSemantic == decl.semantic) {
attrib.binding = uint32_t(element.Stream);
attrib.format = DecodeDecltype(D3DDECLTYPE(element.Type));
attrib.offset = element.Offset;
cIaState.streamsUsed |= 1u << attrib.binding;
break;
}
}
attrList[i] = attrib;
DxvkVertexBinding binding;
binding.binding = attrib.binding;
uint32_t instanceData = cStreamFreq[binding.binding % caps::MaxStreams];
if (instanceData & D3DSTREAMSOURCE_INSTANCEDATA) {
binding.fetchRate = instanceData & 0x7FFFFF; // Remove instance packed-in flags in the data.
binding.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE;
}
else {
binding.fetchRate = 0;
binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
}
// Check if the binding was already defined.
bool bindingDefined = false;
for (uint32_t j = 0; j < i; j++) {
uint32_t bindingId = attrList.at(j).binding;
if (binding.binding == bindingId) {
bindingDefined = true;
}
}
if (!bindingDefined)
bindList.at(binding.binding) = binding;
attrMask |= 1u << i;
bindMask |= 1u << binding.binding;
}
// Compact the attribute and binding lists to filter
// out attributes and bindings not used by the shader
uint32_t attrCount = CompactSparseList(attrList.data(), attrMask);
uint32_t bindCount = CompactSparseList(bindList.data(), bindMask);
ctx->setInputLayout(
attrCount, attrList.data(),
bindCount, bindList.data());
});
}
}
void D3D9DeviceEx::BindVertexBuffer(
UINT Slot,
D3D9VertexBuffer* pBuffer,
UINT Offset,
UINT Stride) {
EmitCs([
cSlotId = Slot,
cBufferSlice = pBuffer != nullptr ?
pBuffer->GetCommonBuffer()->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>(Offset)
: DxvkBufferSlice(),
cStride = pBuffer != nullptr ? Stride : 0
] (DxvkContext* ctx) {
ctx->bindVertexBuffer(cSlotId, cBufferSlice, cStride);
});
}
void D3D9DeviceEx::BindIndices() {
D3D9CommonBuffer* buffer = GetCommonBuffer(m_state.indices);
D3D9Format format = buffer != nullptr
? buffer->Desc()->Format
: D3D9Format::INDEX32;
const VkIndexType indexType = DecodeIndexType(format);
EmitCs([
cBufferSlice = buffer != nullptr ? buffer->GetBufferSlice<D3D9_COMMON_BUFFER_TYPE_REAL>() : DxvkBufferSlice(),
cIndexType = indexType
](DxvkContext* ctx) {
ctx->bindIndexBuffer(cBufferSlice, cIndexType);
});
}
void D3D9DeviceEx::Begin(D3D9Query* pQuery) {
D3D9DeviceLock lock = LockDevice();
EmitCs([cQuery = Com<D3D9Query, false>(pQuery)](DxvkContext* ctx) {
cQuery->Begin(ctx);
});
}
void D3D9DeviceEx::End(D3D9Query* pQuery) {
D3D9DeviceLock lock = LockDevice();
if (unlikely(pQuery->IsEvent())) {
pQuery->NotifyEnd();
pQuery->IsStalling()
? Flush()
: FlushImplicit(TRUE);
}
EmitCs([cQuery = Com<D3D9Query, false>(pQuery)](DxvkContext* ctx) {
cQuery->End(ctx);
});
}
void D3D9DeviceEx::SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) {
m_state.vsConsts.bConsts[idx] &= ~mask;
m_state.vsConsts.bConsts[idx] |= bits & mask;
m_consts[DxsoProgramTypes::VertexShader].dirty = true;
}
void D3D9DeviceEx::SetPixelBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits) {
m_state.psConsts.bConsts[idx] &= ~mask;
m_state.psConsts.bConsts[idx] |= bits & mask;
m_consts[DxsoProgramTypes::PixelShader].dirty = true;
}
HRESULT D3D9DeviceEx::CreateShaderModule(
D3D9CommonShader* pShaderModule,
VkShaderStageFlagBits ShaderStage,
const DWORD* pShaderBytecode,
const DxsoModuleInfo* pModuleInfo) {
try {
*pShaderModule = m_shaderModules->GetShaderModule(this,
ShaderStage, pModuleInfo, pShaderBytecode);
return D3D_OK;
}
catch (const DxvkError& e) {
Logger::err(e.message());
return D3DERR_INVALIDCALL;
}
}
template <
DxsoProgramType ProgramType,
D3D9ConstantType ConstantType,
typename T>
HRESULT D3D9DeviceEx::SetShaderConstants(
UINT StartRegister,
const T* pConstantData,
UINT Count) {
const uint32_t regCountHardware = DetermineHardwareRegCount<ProgramType, ConstantType>();
constexpr uint32_t regCountSoftware = DetermineSoftwareRegCount<ProgramType, ConstantType>();
if (unlikely(StartRegister + Count > regCountSoftware))
return D3DERR_INVALIDCALL;
Count = UINT(
std::max<INT>(
std::clamp<INT>(Count + StartRegister, 0, regCountHardware) - INT(StartRegister),
0));
if (unlikely(Count == 0))
return D3D_OK;
if (unlikely(pConstantData == nullptr))
return D3DERR_INVALIDCALL;
if (unlikely(ShouldRecord()))
return m_recorder->SetShaderConstants<ProgramType, ConstantType, T>(
StartRegister,
pConstantData,
Count);
auto DetermineMaxCount = [&](const auto& shader) {
if (unlikely(shader == nullptr))
return 0u;
const auto& meta = GetCommonShader(shader)->GetMeta();
if constexpr (ConstantType == D3D9ConstantType::Float)
return meta.maxConstIndexF;
else if constexpr (ConstantType == D3D9ConstantType::Int)
return meta.maxConstIndexI;
else
return meta.maxConstIndexB;
};
uint32_t maxCount = ProgramType == DxsoProgramTypes::VertexShader
? DetermineMaxCount(m_state.vertexShader)
: DetermineMaxCount(m_state.pixelShader);
m_consts[ProgramType].dirty |= StartRegister < maxCount;
UpdateStateConstants<ProgramType, ConstantType, T>(
&m_state,
StartRegister,
pConstantData,
Count,
m_d3d9Options.d3d9FloatEmulation);
return D3D_OK;
}
void D3D9DeviceEx::UpdateFixedFunctionVS() {
// Shader...
bool hasPositionT = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT) : false;
bool hasBlendWeight = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendWeight) : false;
bool hasBlendIndices = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasBlendIndices) : false;
bool indexedVertexBlend = hasBlendIndices && m_state.renderStates[D3DRS_INDEXEDVERTEXBLENDENABLE];
D3D9FF_VertexBlendMode vertexBlendMode = D3D9FF_VertexBlendMode_Disabled;
if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE && !hasPositionT) {
vertexBlendMode = m_state.renderStates[D3DRS_VERTEXBLEND] == D3DVBF_TWEENING
? D3D9FF_VertexBlendMode_Tween
: D3D9FF_VertexBlendMode_Normal;
if (m_state.renderStates[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) {
if (!hasBlendWeight)
vertexBlendMode = D3D9FF_VertexBlendMode_Disabled;
}
else if (!indexedVertexBlend)
vertexBlendMode = D3D9FF_VertexBlendMode_Disabled;
}
if (unlikely(hasPositionT && m_state.vertexShader != nullptr && !m_flags.test(D3D9DeviceFlag::DirtyProgVertexShader))) {
m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
m_flags.set(D3D9DeviceFlag::DirtyProgVertexShader);
}
if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexShader)) {
m_flags.clr(D3D9DeviceFlag::DirtyFFVertexShader);
D3D9FFShaderKeyVS key;
key.Data.Contents.HasPositionT = hasPositionT;
key.Data.Contents.HasColor0 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor0) : false;
key.Data.Contents.HasColor1 = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasColor1) : false;
key.Data.Contents.HasPointSize = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPointSize) : false;
key.Data.Contents.HasFog = m_state.vertexDecl != nullptr ? m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasFog) : false;
bool lighting = m_state.renderStates[D3DRS_LIGHTING] != 0 && !key.Data.Contents.HasPositionT;
bool colorVertex = m_state.renderStates[D3DRS_COLORVERTEX] != 0;
uint32_t mask = (lighting && colorVertex)
? (key.Data.Contents.HasColor0 ? D3DMCS_COLOR1 : D3DMCS_MATERIAL)
| (key.Data.Contents.HasColor1 ? D3DMCS_COLOR2 : D3DMCS_MATERIAL)
: 0;
key.Data.Contents.UseLighting = lighting;
key.Data.Contents.NormalizeNormals = m_state.renderStates[D3DRS_NORMALIZENORMALS];
key.Data.Contents.LocalViewer = m_state.renderStates[D3DRS_LOCALVIEWER] && lighting;
key.Data.Contents.RangeFog = m_state.renderStates[D3DRS_RANGEFOGENABLE];
key.Data.Contents.DiffuseSource = m_state.renderStates[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
key.Data.Contents.AmbientSource = m_state.renderStates[D3DRS_AMBIENTMATERIALSOURCE] & mask;
key.Data.Contents.SpecularSource = m_state.renderStates[D3DRS_SPECULARMATERIALSOURCE] & mask;
key.Data.Contents.EmissiveSource = m_state.renderStates[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
uint32_t lightCount = 0;
if (key.Data.Contents.UseLighting) {
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
if (m_state.enabledLightIndices[i] != UINT32_MAX)
lightCount++;
}
}
key.Data.Contents.LightCount = lightCount;
for (uint32_t i = 0; i < caps::MaxTextureBlendStages; i++) {
uint32_t transformFlags = m_state.textureStages[i][D3DTSS_TEXTURETRANSFORMFLAGS] & ~(D3DTTFF_PROJECTED);
uint32_t index = m_state.textureStages[i][D3DTSS_TEXCOORDINDEX];
uint32_t indexFlags = (index & TCIMask) >> TCIOffset;
transformFlags &= 0b111;
index &= 0b111;
key.Data.Contents.TransformFlags |= transformFlags << (i * 3);
key.Data.Contents.TexcoordFlags |= indexFlags << (i * 3);
key.Data.Contents.TexcoordIndices |= index << (i * 3);
}
key.Data.Contents.TexcoordDeclMask = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetTexcoordMask() : 0;
key.Data.Contents.VertexBlendMode = uint32_t(vertexBlendMode);
if (vertexBlendMode == D3D9FF_VertexBlendMode_Normal) {
key.Data.Contents.VertexBlendIndexed = indexedVertexBlend;
key.Data.Contents.VertexBlendCount = m_state.renderStates[D3DRS_VERTEXBLEND] & 0xff;
}
EmitCs([
this,
cKey = key,
&cShaders = m_ffModules
](DxvkContext* ctx) {
auto shader = cShaders.GetShaderModule(this, cKey);
ctx->bindShader(VK_SHADER_STAGE_VERTEX_BIT, shader.GetShader());
});
}
if (hasPositionT && (m_flags.test(D3D9DeviceFlag::DirtyFFViewport) || m_ffZTest != IsZTestEnabled())) {
m_flags.clr(D3D9DeviceFlag::DirtyFFViewport);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
const auto& vp = m_state.viewport;
// For us to account for the Vulkan viewport rules
// when translating Window Coords -> Real Coords:
// We need to negate the inverse extent we multiply by,
// this follows through to the offset when that gets
// timesed by it.
// The 1.0f additional offset however does not,
// so we account for that there manually.
m_ffZTest = IsZTestEnabled();
float zMin = m_ffZTest ? vp.MinZ : 0.0f;
float zMax = m_ffZTest ? vp.MaxZ : 0.0f;
float zExtent = zMax - zMin;
zExtent = zExtent != 0.0f
? 1.0f / zExtent
: 0.0f;
m_viewportInfo.inverseExtent = Vector4(
2.0f / float(vp.Width),
-2.0f / float(vp.Height),
zExtent,
1.0f);
m_viewportInfo.inverseOffset = Vector4(
-float(vp.X), -float(vp.Y),
-zMin, 0.0f);
m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset * m_viewportInfo.inverseExtent;
m_viewportInfo.inverseOffset = m_viewportInfo.inverseOffset + Vector4(-1.0f, 1.0f, 0.0f, 0.0f);
}
// Constants...
if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexData)) {
m_flags.clr(D3D9DeviceFlag::DirtyFFVertexData);
DxvkBufferSliceHandle slice = m_vsFixedFunction->allocSlice();
EmitCs([
cBuffer = m_vsFixedFunction,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
auto WorldView = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLD)];
auto NormalMatrix = inverse(WorldView);
D3D9FixedFunctionVS* data = reinterpret_cast<D3D9FixedFunctionVS*>(slice.mapPtr);
data->WorldView = WorldView;
data->NormalMatrix = NormalMatrix;
data->Projection = m_state.transforms[GetTransformIndex(D3DTS_PROJECTION)];
for (uint32_t i = 0; i < data->TexcoordMatrices.size(); i++)
data->TexcoordMatrices[i] = m_state.transforms[GetTransformIndex(D3DTS_TEXTURE0) + i];
data->ViewportInfo = m_viewportInfo;
DecodeD3DCOLOR(m_state.renderStates[D3DRS_AMBIENT], data->GlobalAmbient.data);
uint32_t lightIdx = 0;
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
auto idx = m_state.enabledLightIndices[i];
if (idx == UINT32_MAX)
continue;
data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]);
}
data->Material = m_state.material;
data->TweenFactor = bit::cast<float>(m_state.renderStates[D3DRS_TWEENFACTOR]);
}
if (m_flags.test(D3D9DeviceFlag::DirtyFFVertexBlend) && vertexBlendMode == D3D9FF_VertexBlendMode_Normal) {
m_flags.clr(D3D9DeviceFlag::DirtyFFVertexBlend);
DxvkBufferSliceHandle slice = m_vsVertexBlend->allocSlice();
EmitCs([
cBuffer = m_vsVertexBlend,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
auto UploadVertexBlendData = [&](auto data) {
for (uint32_t i = 0; i < countof(data->WorldView); i++)
data->WorldView[i] = m_state.transforms[GetTransformIndex(D3DTS_VIEW)] * m_state.transforms[GetTransformIndex(D3DTS_WORLDMATRIX(i))];
};
(m_isSWVP && indexedVertexBlend)
? UploadVertexBlendData(reinterpret_cast<D3D9FixedFunctionVertexBlendDataSW*>(slice.mapPtr))
: UploadVertexBlendData(reinterpret_cast<D3D9FixedFunctionVertexBlendDataHW*>(slice.mapPtr));
}
}
void D3D9DeviceEx::UpdateFixedFunctionPS() {
// Shader...
if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelShader)) {
m_flags.clr(D3D9DeviceFlag::DirtyFFPixelShader);
// Used args for a given operation.
auto ArgsMask = [](DWORD Op) {
switch (Op) {
case D3DTOP_DISABLE:
return 0b0u; // No Args
case D3DTOP_SELECTARG1:
case D3DTOP_PREMODULATE:
return 0b10u; // Arg 1
case D3DTOP_SELECTARG2:
return 0b100u; // Arg 2
case D3DTOP_MULTIPLYADD:
case D3DTOP_LERP:
return 0b111u; // Arg 0, 1, 2
default:
return 0b110u; // Arg 1, 2
}
};
D3D9FFShaderKeyFS key;
uint32_t idx;
for (idx = 0; idx < caps::TextureStageCount; idx++) {
auto& stage = key.Stages[idx].Contents;
auto& data = m_state.textureStages[idx];
// Subsequent stages do not occur if this is true.
if (data[D3DTSS_COLOROP] == D3DTOP_DISABLE)
break;
// If the stage is invalid (ie. no texture bound),
// this and all subsequent stages get disabled.
if (m_state.textures[idx] == nullptr) {
if (((data[D3DTSS_COLORARG0] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[D3DTSS_COLOROP]) & (1 << 0u)))
|| ((data[D3DTSS_COLORARG1] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[D3DTSS_COLOROP]) & (1 << 1u)))
|| ((data[D3DTSS_COLORARG2] & D3DTA_SELECTMASK) == D3DTA_TEXTURE && (ArgsMask(data[D3DTSS_COLOROP]) & (1 << 2u))))
break;
}
stage.ColorOp = data[D3DTSS_COLOROP];
stage.AlphaOp = data[D3DTSS_ALPHAOP];
stage.ColorArg0 = data[D3DTSS_COLORARG0];
stage.ColorArg1 = data[D3DTSS_COLORARG1];
stage.ColorArg2 = data[D3DTSS_COLORARG2];
stage.AlphaArg0 = data[D3DTSS_ALPHAARG0];
stage.AlphaArg1 = data[D3DTSS_ALPHAARG1];
stage.AlphaArg2 = data[D3DTSS_ALPHAARG2];
const uint32_t samplerOffset = idx * 2;
stage.Type = (m_samplerTypeBitfield >> samplerOffset) & 0xffu;
stage.ResultIsTemp = data[D3DTSS_RESULTARG] == D3DTA_TEMP;
uint32_t ttff = data[D3DTSS_TEXTURETRANSFORMFLAGS];
uint32_t count = ttff & ~D3DTTFF_PROJECTED;
stage.Projected = (ttff & D3DTTFF_PROJECTED) ? 1 : 0;
stage.ProjectedCount = (ttff & D3DTTFF_PROJECTED) ? count : 0;
}
auto& stage0 = key.Stages[0].Contents;
if (stage0.ResultIsTemp &&
stage0.ColorOp != D3DTOP_DISABLE &&
stage0.AlphaOp == D3DTOP_DISABLE) {
stage0.AlphaOp = D3DTOP_SELECTARG1;
stage0.AlphaArg1 = D3DTA_DIFFUSE;
}
stage0.GlobalSpecularEnable = m_state.renderStates[D3DRS_SPECULARENABLE];
stage0.GlobalFlatShade = m_state.renderStates[D3DRS_SHADEMODE] == D3DSHADE_FLAT;
// The last stage *always* writes to current.
if (idx >= 1)
key.Stages[idx - 1].Contents.ResultIsTemp = false;
EmitCs([
this,
cKey = key,
&cShaders = m_ffModules
](DxvkContext* ctx) {
auto shader = cShaders.GetShaderModule(this, cKey);
ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, shader.GetShader());
});
}
// Constants
if (m_flags.test(D3D9DeviceFlag::DirtyFFPixelData)) {
m_flags.clr(D3D9DeviceFlag::DirtyFFPixelData);
DxvkBufferSliceHandle slice = m_psFixedFunction->allocSlice();
EmitCs([
cBuffer = m_psFixedFunction,
cSlice = slice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cSlice);
});
auto& rs = m_state.renderStates;
D3D9FixedFunctionPS* data = reinterpret_cast<D3D9FixedFunctionPS*>(slice.mapPtr);
DecodeD3DCOLOR((D3DCOLOR)rs[D3DRS_TEXTUREFACTOR], data->textureFactor.data);
}
}
bool D3D9DeviceEx::UseProgrammableVS() {
return m_state.vertexShader != nullptr
&& m_state.vertexDecl != nullptr
&& !m_state.vertexDecl->TestFlag(D3D9VertexDeclFlag::HasPositionT);
}
bool D3D9DeviceEx::UseProgrammablePS() {
return m_state.pixelShader != nullptr;
}
void D3D9DeviceEx::UpdateSamplerSpecConsant(uint32_t value) {
EmitCs([cBitfield = value](DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::SamplerType, cBitfield);
});
m_lastSamplerTypeBitfield = value;
}
void D3D9DeviceEx::UpdateProjectionSpecConstant(uint32_t value) {
EmitCs([cBitfield = value](DxvkContext* ctx) {
ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::ProjectionType, cBitfield);
});
m_lastProjectionBitfield = value;
}
void D3D9DeviceEx::ApplyPrimitiveType(
DxvkContext* pContext,
D3DPRIMITIVETYPE PrimType) {
if (m_iaState.primitiveType != PrimType) {
m_iaState.primitiveType = PrimType;
auto iaState = DecodeInputAssemblyState(PrimType);
pContext->setInputAssemblyState(iaState);
}
}
void D3D9DeviceEx::ResolveZ() {
D3D9Surface* src = m_state.depthStencil.ptr();
IDirect3DBaseTexture9* dst = m_state.textures[0];
if (unlikely(!src || !dst))
return;
D3D9CommonTexture* srcTextureInfo = GetCommonTexture(src);
D3D9CommonTexture* dstTextureInfo = GetCommonTexture(dst);
const D3D9_COMMON_TEXTURE_DESC* srcDesc = srcTextureInfo->Desc();
const D3D9_COMMON_TEXTURE_DESC* dstDesc = dstTextureInfo->Desc();
VkSampleCountFlagBits dstSampleCount;
DecodeMultiSampleType(dstDesc->MultiSample, dstDesc->MultisampleQuality, &dstSampleCount);
if (unlikely(dstSampleCount != VK_SAMPLE_COUNT_1_BIT)) {
Logger::warn("D3D9DeviceEx::ResolveZ: dstSampleCount != 1. Discarding.");
return;
}
const D3D9_VK_FORMAT_MAPPING srcFormatInfo = LookupFormat(srcDesc->Format);
const D3D9_VK_FORMAT_MAPPING dstFormatInfo = LookupFormat(dstDesc->Format);
auto srcVulkanFormatInfo = imageFormatInfo(srcFormatInfo.FormatColor);
auto dstVulkanFormatInfo = imageFormatInfo(dstFormatInfo.FormatColor);
const VkImageSubresource dstSubresource =
dstTextureInfo->GetSubresourceFromIndex(
dstVulkanFormatInfo->aspectMask, 0);
const VkImageSubresource srcSubresource =
srcTextureInfo->GetSubresourceFromIndex(
srcVulkanFormatInfo->aspectMask, src->GetSubresource());
const VkImageSubresourceLayers dstSubresourceLayers = {
dstSubresource.aspectMask,
dstSubresource.mipLevel,
dstSubresource.arrayLayer, 1 };
const VkImageSubresourceLayers srcSubresourceLayers = {
srcSubresource.aspectMask,
srcSubresource.mipLevel,
srcSubresource.arrayLayer, 1 };
VkSampleCountFlagBits srcSampleCount;
DecodeMultiSampleType(srcDesc->MultiSample, srcDesc->MultisampleQuality, &srcSampleCount);
if (srcSampleCount == VK_SAMPLE_COUNT_1_BIT) {
EmitCs([
cDstImage = dstTextureInfo->GetImage(),
cSrcImage = srcTextureInfo->GetImage(),
cDstLayers = dstSubresourceLayers,
cSrcLayers = srcSubresourceLayers
] (DxvkContext* ctx) {
ctx->copyImage(
cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 },
cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 },
cDstImage->mipLevelExtent(cDstLayers.mipLevel));
});
} else {
EmitCs([
cDstImage = dstTextureInfo->GetImage(),
cSrcImage = srcTextureInfo->GetImage(),
cDstSubres = dstSubresourceLayers,
cSrcSubres = srcSubresourceLayers
] (DxvkContext* ctx) {
// We should resolve using the first sample according to
// http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Advanced-DX9-Capabilities-for-ATI-Radeon-Cards_v2.pdf
// "The resolve operation copies the depth value from the *first sample only* into the resolved depth stencil texture."
constexpr auto resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR;
VkImageResolve region;
region.srcSubresource = cSrcSubres;
region.srcOffset = VkOffset3D { 0, 0, 0 };
region.dstSubresource = cDstSubres;
region.dstOffset = VkOffset3D { 0, 0, 0 };
region.extent = cDstImage->mipLevelExtent(cDstSubres.mipLevel);
ctx->resolveDepthStencilImage(cDstImage, cSrcImage, region, resolveMode, resolveMode);
});
}
dstTextureInfo->MarkAllDirty();
}
void D3D9DeviceEx::TransitionImage(D3D9CommonTexture* pResource, VkImageLayout NewLayout) {
EmitCs([
cImage = pResource->GetImage(),
cNewLayout = NewLayout
] (DxvkContext* ctx) {
ctx->changeImageLayout(
cImage, cNewLayout);
});
}
void D3D9DeviceEx::TransformImage(
D3D9CommonTexture* pResource,
const VkImageSubresourceRange* pSubresources,
VkImageLayout OldLayout,
VkImageLayout NewLayout) {
EmitCs([
cImage = pResource->GetImage(),
cSubresources = *pSubresources,
cOldLayout = OldLayout,
cNewLayout = NewLayout
] (DxvkContext* ctx) {
ctx->transformImage(
cImage, cSubresources,
cOldLayout, cNewLayout);
});
}
HRESULT D3D9DeviceEx::ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters) {
if (!pPresentationParameters->EnableAutoDepthStencil)
SetDepthStencilSurface(nullptr);
for (uint32_t i = 1; i < caps::MaxSimultaneousRenderTargets; i++)
SetRenderTarget(0, nullptr);
auto& rs = m_state.renderStates;
rs[D3DRS_SEPARATEALPHABLENDENABLE] = FALSE;
rs[D3DRS_ALPHABLENDENABLE] = FALSE;
rs[D3DRS_BLENDOP] = D3DBLENDOP_ADD;
rs[D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD;
rs[D3DRS_DESTBLEND] = D3DBLEND_ZERO;
rs[D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO;
rs[D3DRS_COLORWRITEENABLE] = 0x0000000f;
rs[D3DRS_COLORWRITEENABLE1] = 0x0000000f;
rs[D3DRS_COLORWRITEENABLE2] = 0x0000000f;
rs[D3DRS_COLORWRITEENABLE3] = 0x0000000f;
rs[D3DRS_SRCBLEND] = D3DBLEND_ONE;
rs[D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE;
BindBlendState();
rs[D3DRS_BLENDFACTOR] = 0xffffffff;
BindBlendFactor();
rs[D3DRS_ZENABLE] = pPresentationParameters->EnableAutoDepthStencil
? D3DZB_TRUE
: D3DZB_FALSE;
rs[D3DRS_ZFUNC] = D3DCMP_LESSEQUAL;
rs[D3DRS_TWOSIDEDSTENCILMODE] = FALSE;
rs[D3DRS_ZWRITEENABLE] = TRUE;
rs[D3DRS_STENCILENABLE] = FALSE;
rs[D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP;
rs[D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP;
rs[D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP;
rs[D3DRS_STENCILFUNC] = D3DCMP_ALWAYS;
rs[D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP;
rs[D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP;
rs[D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP;
rs[D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS;
rs[D3DRS_STENCILMASK] = 0xFFFFFFFF;
rs[D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF;
BindDepthStencilState();
rs[D3DRS_STENCILREF] = 0;
BindDepthStencilRefrence();
rs[D3DRS_FILLMODE] = D3DFILL_SOLID;
rs[D3DRS_CULLMODE] = D3DCULL_CCW;
rs[D3DRS_DEPTHBIAS] = bit::cast<DWORD>(0.0f);
rs[D3DRS_SLOPESCALEDEPTHBIAS] = bit::cast<DWORD>(0.0f);
BindRasterizerState();
BindDepthBias();
rs[D3DRS_SCISSORTESTENABLE] = FALSE;
rs[D3DRS_ALPHATESTENABLE] = FALSE;
rs[D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS;
BindAlphaTestState();
rs[D3DRS_ALPHAREF] = 0;
UpdatePushConstant<D3D9RenderStateItem::AlphaRef>();
rs[D3DRS_MULTISAMPLEMASK] = 0xffffffff;
BindMultiSampleState();
rs[D3DRS_TEXTUREFACTOR] = 0xffffffff;
m_flags.set(D3D9DeviceFlag::DirtyFFPixelData);
rs[D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1;
rs[D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2;
rs[D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL;
rs[D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL;
rs[D3DRS_LIGHTING] = TRUE;
rs[D3DRS_COLORVERTEX] = TRUE;
rs[D3DRS_LOCALVIEWER] = TRUE;
rs[D3DRS_RANGEFOGENABLE] = FALSE;
rs[D3DRS_NORMALIZENORMALS] = FALSE;
m_flags.set(D3D9DeviceFlag::DirtyFFVertexShader);
// PS
rs[D3DRS_SPECULARENABLE] = FALSE;
rs[D3DRS_AMBIENT] = 0;
m_flags.set(D3D9DeviceFlag::DirtyFFVertexData);
rs[D3DRS_FOGENABLE] = FALSE;
rs[D3DRS_FOGCOLOR] = 0;
rs[D3DRS_FOGTABLEMODE] = D3DFOG_NONE;
rs[D3DRS_FOGSTART] = bit::cast<DWORD>(0.0f);
rs[D3DRS_FOGEND] = bit::cast<DWORD>(1.0f);
rs[D3DRS_FOGDENSITY] = bit::cast<DWORD>(1.0f);
rs[D3DRS_FOGVERTEXMODE] = D3DFOG_NONE;
m_flags.set(D3D9DeviceFlag::DirtyFogColor);
m_flags.set(D3D9DeviceFlag::DirtyFogDensity);
m_flags.set(D3D9DeviceFlag::DirtyFogEnd);
m_flags.set(D3D9DeviceFlag::DirtyFogScale);
m_flags.set(D3D9DeviceFlag::DirtyFogState);
rs[D3DRS_CLIPPLANEENABLE] = 0;
m_flags.set(D3D9DeviceFlag::DirtyClipPlanes);
rs[D3DRS_POINTSPRITEENABLE] = FALSE;
rs[D3DRS_POINTSCALEENABLE] = FALSE;
rs[D3DRS_POINTSCALE_A] = bit::cast<DWORD>(1.0f);
rs[D3DRS_POINTSCALE_B] = bit::cast<DWORD>(0.0f);
rs[D3DRS_POINTSCALE_C] = bit::cast<DWORD>(0.0f);
rs[D3DRS_POINTSIZE] = bit::cast<DWORD>(1.0f);
rs[D3DRS_POINTSIZE_MIN] = bit::cast<DWORD>(1.0f);
rs[D3DRS_POINTSIZE_MAX] = bit::cast<DWORD>(64.0f);
UpdatePushConstant<D3D9RenderStateItem::PointSize>();
UpdatePushConstant<D3D9RenderStateItem::PointSizeMin>();
UpdatePushConstant<D3D9RenderStateItem::PointSizeMax>();
m_flags.set(D3D9DeviceFlag::DirtyPointScale);
UpdatePointMode<false>();
rs[D3DRS_SRGBWRITEENABLE] = 0;
rs[D3DRS_SHADEMODE] = D3DSHADE_GOURAUD;
rs[D3DRS_VERTEXBLEND] = D3DVBF_DISABLE;
rs[D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE;
rs[D3DRS_TWEENFACTOR] = bit::cast<DWORD>(0.0f);
m_flags.set(D3D9DeviceFlag::DirtyFFVertexBlend);
// Render States not implemented beyond this point.
rs[D3DRS_LASTPIXEL] = TRUE;
rs[D3DRS_DITHERENABLE] = FALSE;
rs[D3DRS_WRAP0] = 0;
rs[D3DRS_WRAP1] = 0;
rs[D3DRS_WRAP2] = 0;
rs[D3DRS_WRAP3] = 0;
rs[D3DRS_WRAP4] = 0;
rs[D3DRS_WRAP5] = 0;
rs[D3DRS_WRAP6] = 0;
rs[D3DRS_WRAP7] = 0;
rs[D3DRS_CLIPPING] = TRUE;
rs[D3DRS_MULTISAMPLEANTIALIAS] = TRUE;
rs[D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE;
rs[D3DRS_DEBUGMONITORTOKEN] = D3DDMT_ENABLE;
rs[D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC;
rs[D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR;
rs[D3DRS_ANTIALIASEDLINEENABLE] = FALSE;
rs[D3DRS_MINTESSELLATIONLEVEL] = bit::cast<DWORD>(1.0f);
rs[D3DRS_MAXTESSELLATIONLEVEL] = bit::cast<DWORD>(1.0f);
rs[D3DRS_ADAPTIVETESS_X] = bit::cast<DWORD>(0.0f);
rs[D3DRS_ADAPTIVETESS_Y] = bit::cast<DWORD>(0.0f);
rs[D3DRS_ADAPTIVETESS_Z] = bit::cast<DWORD>(1.0f);
rs[D3DRS_ADAPTIVETESS_W] = bit::cast<DWORD>(0.0f);
rs[D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE;
rs[D3DRS_WRAP8] = 0;
rs[D3DRS_WRAP9] = 0;
rs[D3DRS_WRAP10] = 0;
rs[D3DRS_WRAP11] = 0;
rs[D3DRS_WRAP12] = 0;
rs[D3DRS_WRAP13] = 0;
rs[D3DRS_WRAP14] = 0;
rs[D3DRS_WRAP15] = 0;
// End Unimplemented Render States
for (uint32_t i = 0; i < caps::TextureStageCount; i++) {
auto& stage = m_state.textureStages[i];
stage[D3DTSS_COLOROP] = i == 0 ? D3DTOP_MODULATE : D3DTOP_DISABLE;
stage[D3DTSS_COLORARG1] = D3DTA_TEXTURE;
stage[D3DTSS_COLORARG2] = D3DTA_CURRENT;
stage[D3DTSS_ALPHAOP] = i == 0 ? D3DTOP_SELECTARG1 : D3DTOP_DISABLE;
stage[D3DTSS_ALPHAARG1] = D3DTA_TEXTURE;
stage[D3DTSS_ALPHAARG2] = D3DTA_CURRENT;
stage[D3DTSS_BUMPENVMAT00] = bit::cast<DWORD>(0.0f);
stage[D3DTSS_BUMPENVMAT01] = bit::cast<DWORD>(0.0f);
stage[D3DTSS_BUMPENVMAT10] = bit::cast<DWORD>(0.0f);
stage[D3DTSS_BUMPENVMAT11] = bit::cast<DWORD>(0.0f);
stage[D3DTSS_TEXCOORDINDEX] = i;
stage[D3DTSS_BUMPENVLSCALE] = bit::cast<DWORD>(0.0f);
stage[D3DTSS_BUMPENVLOFFSET] = bit::cast<DWORD>(0.0f);
stage[D3DTSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE;
stage[D3DTSS_COLORARG0] = D3DTA_CURRENT;
stage[D3DTSS_ALPHAARG0] = D3DTA_CURRENT;
stage[D3DTSS_RESULTARG] = D3DTA_CURRENT;
stage[D3DTSS_CONSTANT] = 0x00000000;
}
m_flags.set(D3D9DeviceFlag::DirtySharedPixelShaderData);
m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader);
for (uint32_t i = 0; i < caps::MaxStreams; i++)
m_state.streamFreq[i] = 1;
for (uint32_t i = 0; i < m_state.textures.size(); i++) {
TextureChangePrivate(m_state.textures[i], nullptr);
DWORD sampler = i;
auto samplerInfo = RemapStateSamplerShader(sampler);
uint32_t slot = computeResourceSlotId(samplerInfo.first, DxsoBindingType::ColorImage, uint32_t(samplerInfo.second));
EmitCs([
cSlot = slot
](DxvkContext* ctx) {
ctx->bindResourceView(cSlot, nullptr, nullptr);
});
}
auto& ss = m_state.samplerStates;
for (uint32_t i = 0; i < ss.size(); i++) {
auto& state = ss[i];
state[D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP;
state[D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP;
state[D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP;
state[D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP;
state[D3DSAMP_BORDERCOLOR] = 0x00000000;
state[D3DSAMP_MAGFILTER] = D3DTEXF_POINT;
state[D3DSAMP_MINFILTER] = D3DTEXF_POINT;
state[D3DSAMP_MIPFILTER] = D3DTEXF_NONE;
state[D3DSAMP_MIPMAPLODBIAS] = bit::cast<DWORD>(0.0f);
state[D3DSAMP_MAXMIPLEVEL] = 0;
state[D3DSAMP_MAXANISOTROPY] = 1;
state[D3DSAMP_SRGBTEXTURE] = 0;
state[D3DSAMP_ELEMENTINDEX] = 0;
state[D3DSAMP_DMAPOFFSET] = 0;
BindSampler(i);
}
m_dirtySamplerStates = 0;
for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) {
float plane[4] = { 0, 0, 0, 0 };
SetClipPlane(i, plane);
}
// We should do this...
m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
UpdateSamplerSpecConsant(0u);
return D3D_OK;
}
HRESULT D3D9DeviceEx::ResetSwapChain(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) {
D3D9Format backBufferFmt = EnumerateFormat(pPresentationParameters->BackBufferFormat);
Logger::info(str::format(
"D3D9DeviceEx::ResetSwapChain:\n",
" Requested Presentation Parameters\n",
" - Width: ", pPresentationParameters->BackBufferWidth, "\n",
" - Height: ", pPresentationParameters->BackBufferHeight, "\n",
" - Format: ", backBufferFmt, "\n"
" - Auto Depth Stencil: ", pPresentationParameters->EnableAutoDepthStencil ? "true" : "false", "\n",
" ^ Format: ", EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat), "\n",
" - Windowed: ", pPresentationParameters->Windowed ? "true" : "false", "\n"));
if (backBufferFmt != D3D9Format::Unknown) {
if (!IsSupportedBackBufferFormat(
backBufferFmt,
pPresentationParameters->Windowed)) {
Logger::err(str::format("D3D9DeviceEx::ResetSwapChain: Unsupported backbuffer format: ",
EnumerateFormat(pPresentationParameters->BackBufferFormat)));
return D3DERR_INVALIDCALL;
}
}
if (auto* implicitSwapchain = GetInternalSwapchain(0))
implicitSwapchain->Reset(pPresentationParameters, pFullscreenDisplayMode);
else
m_swapchains.emplace_back(new D3D9SwapChainEx(this, pPresentationParameters, pFullscreenDisplayMode));
if (pPresentationParameters->EnableAutoDepthStencil) {
D3D9_COMMON_TEXTURE_DESC desc;
desc.Width = pPresentationParameters->BackBufferWidth;
desc.Height = pPresentationParameters->BackBufferHeight;
desc.Depth = 1;
desc.ArraySize = 1;
desc.MipLevels = 1;
desc.Usage = D3DUSAGE_DEPTHSTENCIL;
desc.Format = EnumerateFormat(pPresentationParameters->AutoDepthStencilFormat);
desc.Pool = D3DPOOL_DEFAULT;
desc.Discard = FALSE;
desc.MultiSample = pPresentationParameters->MultiSampleType;
desc.MultisampleQuality = pPresentationParameters->MultiSampleQuality;
if (FAILED(D3D9CommonTexture::NormalizeTextureProperties(this, &desc)))
return D3DERR_NOTAVAILABLE;
m_autoDepthStencil = new D3D9Surface(this, &desc);
m_initializer->InitTexture(m_autoDepthStencil->GetCommonTexture());
SetDepthStencilSurface(m_autoDepthStencil.ptr());
}
SetRenderTarget(0, GetInternalSwapchain(0)->GetBackBuffer(0));
// Force this if we end up binding the same RT to make scissor change go into effect.
BindViewportAndScissor();
return D3D_OK;
}
HRESULT D3D9DeviceEx::InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode) {
HRESULT hr = ResetSwapChain(pPresentationParameters, pFullscreenDisplayMode);
if (FAILED(hr))
return hr;
hr = ResetState(pPresentationParameters);
if (FAILED(hr))
return hr;
Flush();
SynchronizeCsThread();
return D3D_OK;
}
}