#pragma once #include "d3d9_device_child.h" #include "d3d9_device.h" #include "d3d9_state.h" #include "../util/util_bit.h" namespace dxvk { enum class D3D9CapturedStateFlag : uint32_t { VertexDecl, Indices, RenderStates, SamplerStates, VertexBuffers, Textures, VertexShader, PixelShader, Viewport, ScissorRect, ClipPlanes, VsConstants, PsConstants, StreamFreq, Transforms, TextureStages, Material }; using D3D9CapturedStateFlags = Flags; struct D3D9StateCaptures { D3D9CapturedStateFlags flags; bit::bitset renderStates; bit::bitset samplers; std::array< bit::bitset, SamplerCount> samplerStates; bit::bitset vertexBuffers; bit::bitset textures; bit::bitset clipPlanes; bit::bitset streamFreq; bit::bitset transforms; bit::bitset textureStages; std::array< bit::bitset, caps::TextureStageCount> textureStageStates; struct { bit::bitset fConsts; bit::bitset iConsts; bit::bitset bConsts; } vsConsts; struct { bit::bitset fConsts; bit::bitset iConsts; bit::bitset bConsts; } psConsts; }; enum class D3D9StateBlockType :uint32_t { None, VertexState, PixelState, All }; inline D3D9StateBlockType ConvertStateBlockType(D3DSTATEBLOCKTYPE type) { switch (type) { case D3DSBT_PIXELSTATE: return D3D9StateBlockType::PixelState; case D3DSBT_VERTEXSTATE: return D3D9StateBlockType::VertexState; default: case D3DSBT_ALL: return D3D9StateBlockType::All; } } using D3D9StateBlockBase = D3D9DeviceChild; class D3D9StateBlock : public D3D9StateBlockBase { public: D3D9StateBlock(D3D9DeviceEx* pDevice, D3D9StateBlockType Type); HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void** ppvObject) final; HRESULT STDMETHODCALLTYPE Capture() final; HRESULT STDMETHODCALLTYPE Apply() final; HRESULT SetVertexDeclaration(D3D9VertexDecl* pDecl); HRESULT SetIndices(D3D9IndexBuffer* pIndexData); HRESULT SetRenderState(D3DRENDERSTATETYPE State, DWORD Value); HRESULT SetStateSamplerState( DWORD StateSampler, D3DSAMPLERSTATETYPE Type, DWORD Value); HRESULT SetStreamSource( UINT StreamNumber, D3D9VertexBuffer* pStreamData, UINT OffsetInBytes, UINT Stride); HRESULT SetStreamSourceFreq(UINT StreamNumber, UINT Setting); HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture); HRESULT SetVertexShader(D3D9VertexShader* pShader); HRESULT SetPixelShader(D3D9PixelShader* pShader); HRESULT SetMaterial(const D3DMATERIAL9* pMaterial); HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); HRESULT SetTextureStageState( DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value); HRESULT MultiplyStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); HRESULT SetViewport(const D3DVIEWPORT9* pViewport); HRESULT SetScissorRect(const RECT* pRect); HRESULT SetClipPlane(DWORD Index, const float* pPlane); HRESULT SetVertexShaderConstantF( UINT StartRegister, const float* pConstantData, UINT Vector4fCount); HRESULT SetVertexShaderConstantI( UINT StartRegister, const int* pConstantData, UINT Vector4iCount); HRESULT SetVertexShaderConstantB( UINT StartRegister, const BOOL* pConstantData, UINT BoolCount); HRESULT SetPixelShaderConstantF( UINT StartRegister, const float* pConstantData, UINT Vector4fCount); HRESULT SetPixelShaderConstantI( UINT StartRegister, const int* pConstantData, UINT Vector4iCount); HRESULT SetPixelShaderConstantB( UINT StartRegister, const BOOL* pConstantData, UINT BoolCount); enum class D3D9StateFunction { Apply, Capture }; template void ApplyOrCapture(Dst* dst, const Src* src) { if (m_captures.flags.test(D3D9CapturedStateFlag::VertexDecl)) dst->SetVertexDeclaration(src->vertexDecl.ptr()); if (m_captures.flags.test(D3D9CapturedStateFlag::StreamFreq)) { for (uint32_t i = 0; i < m_captures.streamFreq.dwordCount(); i++) { for (uint32_t stream = m_captures.streamFreq.dword(i); stream; stream &= stream - 1) { uint32_t idx = i * 32 + bit::tzcnt(stream); dst->SetStreamSourceFreq(idx, src->streamFreq[idx]); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::Indices)) dst->SetIndices(src->indices.ptr()); if (m_captures.flags.test(D3D9CapturedStateFlag::RenderStates)) { for (uint32_t i = 0; i < m_captures.renderStates.dwordCount(); i++) { for (uint32_t rs = m_captures.renderStates.dword(i); rs; rs &= rs - 1) { uint32_t idx = i * 32 + bit::tzcnt(rs); dst->SetRenderState(D3DRENDERSTATETYPE(idx), src->renderStates[idx]); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::SamplerStates)) { for (uint32_t i = 0; i < m_captures.samplers.dwordCount(); i++) { for (uint32_t sampler = m_captures.samplers.dword(i); sampler; sampler &= sampler - 1) { uint32_t samplerIdx = i * 32 + bit::tzcnt(sampler); for (uint32_t j = 0; j < m_captures.samplerStates[i].dwordCount(); j++) { for (uint32_t state = m_captures.samplerStates[i].dword(j); state; state &= state - 1) { uint32_t stateIdx = j * 32 + bit::tzcnt(state); dst->SetStateSamplerState(samplerIdx, D3DSAMPLERSTATETYPE(stateIdx), src->samplerStates[samplerIdx][stateIdx]); } } } } } if (m_captures.flags.test(D3D9CapturedStateFlag::VertexBuffers)) { for (uint32_t i = 0; i < m_captures.vertexBuffers.dwordCount(); i++) { for (uint32_t vb = m_captures.vertexBuffers.dword(i); vb; vb &= vb - 1) { uint32_t idx = i * 32 + bit::tzcnt(vb); const auto& vbo = src->vertexBuffers[idx]; dst->SetStreamSource( idx, vbo.vertexBuffer.ptr(), vbo.offset, vbo.stride); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::Material)) dst->SetMaterial(&src->material); if (m_captures.flags.test(D3D9CapturedStateFlag::Textures)) { for (uint32_t i = 0; i < m_captures.textures.dwordCount(); i++) { for (uint32_t tex = m_captures.textures.dword(i); tex; tex &= tex - 1) { uint32_t idx = i * 32 + bit::tzcnt(tex); dst->SetStateTexture(idx, src->textures[idx]); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::VertexShader)) dst->SetVertexShader(src->vertexShader.ptr()); if (m_captures.flags.test(D3D9CapturedStateFlag::PixelShader)) dst->SetPixelShader(src->pixelShader.ptr()); if (m_captures.flags.test(D3D9CapturedStateFlag::Transforms)) { for (uint32_t i = 0; i < m_captures.transforms.dwordCount(); i++) { for (uint32_t trans = m_captures.transforms.dword(i); trans; trans &= trans - 1) { uint32_t idx = i * 32 + bit::tzcnt(trans); dst->SetStateTransform(idx, reinterpret_cast(&src->transforms[idx])); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::TextureStages)) { for (uint32_t i = 0; i < m_captures.textureStages.dwordCount(); i++) { for (uint32_t stage = m_captures.textureStages.dword(i); stage; stage &= stage - 1) { uint32_t stageIdx = i * 32 + bit::tzcnt(stage); for (uint32_t j = 0; j < m_captures.textureStageStates[i].dwordCount(); j++) { for (uint32_t state = m_captures.textureStageStates[i].dword(j); state; state &= state - 1) { uint32_t stateIdx = j * 32 + bit::tzcnt(state); dst->SetTextureStageState(stageIdx, (D3DTEXTURESTAGESTATETYPE)stateIdx, src->textureStages[stageIdx][stateIdx]); } } } } } if (m_captures.flags.test(D3D9CapturedStateFlag::Viewport)) dst->SetViewport(&src->viewport); if (m_captures.flags.test(D3D9CapturedStateFlag::ScissorRect)) dst->SetScissorRect(&src->scissorRect); if (m_captures.flags.test(D3D9CapturedStateFlag::ClipPlanes)) { for (uint32_t i = 0; i < m_captures.clipPlanes.dwordCount(); i++) { for (uint32_t plane = m_captures.clipPlanes.dword(i); plane; plane &= plane - 1) { uint32_t idx = i * 32 + bit::tzcnt(plane); dst->SetClipPlane(idx, src->clipPlanes[idx].coeff); } } } if (m_captures.flags.test(D3D9CapturedStateFlag::VsConstants)) { for (uint32_t i = 0; i < m_captures.vsConsts.fConsts.dwordCount(); i++) { for (uint32_t consts = m_captures.vsConsts.fConsts.dword(i); consts; consts &= consts - 1) { uint32_t idx = i * 32 + bit::tzcnt(consts); dst->SetVertexShaderConstantF(idx, (float*)&src->vsConsts.fConsts[idx], 1); } } for (uint32_t i = 0; i < m_captures.vsConsts.iConsts.dwordCount(); i++) { for (uint32_t consts = m_captures.vsConsts.iConsts.dword(i); consts; consts &= consts - 1) { uint32_t idx = i * 32 + bit::tzcnt(consts); dst->SetVertexShaderConstantI(idx, (int*)&src->vsConsts.iConsts[idx], 1); } } if (m_captures.vsConsts.bConsts.any()) { for (uint32_t i = 0; i < m_captures.vsConsts.bConsts.dwordCount(); i++) dst->SetVertexBoolBitfield(i, m_captures.vsConsts.bConsts.dword(i), src->vsConsts.bConsts[i]); } } if (m_captures.flags.test(D3D9CapturedStateFlag::PsConstants)) { for (uint32_t i = 0; i < m_captures.psConsts.fConsts.dwordCount(); i++) { for (uint32_t consts = m_captures.psConsts.fConsts.dword(i); consts; consts &= consts - 1) { uint32_t idx = i * 32 + bit::tzcnt(consts); dst->SetPixelShaderConstantF(idx, (float*)&src->psConsts.fConsts[idx], 1); } } for (uint32_t i = 0; i < m_captures.psConsts.iConsts.dwordCount(); i++) { for (uint32_t consts = m_captures.psConsts.iConsts.dword(i); consts; consts &= consts - 1) { uint32_t idx = i * 32 + bit::tzcnt(consts); dst->SetPixelShaderConstantI(idx, (int*)&src->psConsts.iConsts[idx], 1); } } if (m_captures.psConsts.bConsts.any()) { for (uint32_t i = 0; i < m_captures.psConsts.bConsts.dwordCount(); i++) dst->SetPixelBoolBitfield(i, m_captures.psConsts.bConsts.dword(i), src->psConsts.bConsts[i]); } } } template void ApplyOrCapture() { if constexpr (Func == D3D9StateFunction::Apply) ApplyOrCapture(m_parent, &m_state); else if constexpr (Func == D3D9StateFunction::Capture) ApplyOrCapture(this, m_deviceState); } template < DxsoProgramType ProgramType, D3D9ConstantType ConstantType, typename T> HRESULT SetShaderConstants( UINT StartRegister, const T* pConstantData, UINT Count) { auto SetHelper = [&](auto& setCaptures) { if constexpr (ProgramType == DxsoProgramTypes::VertexShader) m_captures.flags.set(D3D9CapturedStateFlag::VsConstants); else m_captures.flags.set(D3D9CapturedStateFlag::PsConstants); for (uint32_t i = 0; i < Count; i++) { uint32_t reg = StartRegister + i; if constexpr (ConstantType == D3D9ConstantType::Float) setCaptures.fConsts.set(reg, true); else if constexpr (ConstantType == D3D9ConstantType::Int) setCaptures.iConsts.set(reg, true); else if constexpr (ConstantType == D3D9ConstantType::Bool) setCaptures.bConsts.set(reg, true); } UpdateStateConstants< ProgramType, ConstantType, T>( &m_state, StartRegister, pConstantData, Count, false); return D3D_OK; }; return ProgramType == DxsoProgramTypes::VertexShader ? SetHelper(m_captures.vsConsts) : SetHelper(m_captures.psConsts); } HRESULT SetVertexBoolBitfield(uint32_t idx, uint32_t mask, uint32_t bits); HRESULT SetPixelBoolBitfield (uint32_t idx, uint32_t mask, uint32_t bits); inline bool IsApplying() { return m_applying; } private: void CapturePixelRenderStates(); void CapturePixelSamplerStates(); void CapturePixelShaderStates(); void CaptureVertexRenderStates(); void CaptureVertexSamplerStates(); void CaptureVertexShaderStates(); void CaptureType(D3D9StateBlockType State); D3D9CapturableState m_state; D3D9StateCaptures m_captures; D3D9CapturableState* m_deviceState; bool m_applying = false; }; }