1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-27 13:54:16 +01:00

[d3d9] Defer managed texture uploads until PrepareDraw and when needed

This also caches shader masks used for hazard tracking.
This commit is contained in:
Joshua Ashton 2020-01-11 04:12:59 +00:00 committed by Joshie
parent 0ea510eb9b
commit ae68e3a5bc
5 changed files with 105 additions and 27 deletions

View File

@ -357,6 +357,15 @@ namespace dxvk {
UINT Lod, UINT Lod,
VkImageUsageFlags UsageFlags, VkImageUsageFlags UsageFlags,
bool Srgb); bool Srgb);
D3D9SubresourceBitset& GetUploadBitmask() { return m_needsUpload; }
void SetUploading(UINT Subresource, bool uploading) { m_uploading.set(Subresource, uploading); }
void ClearUploading() { m_uploading.clearAll(); }
bool GetUploading(UINT Subresource) const { return m_uploading.get(Subresource); }
void SetNeedsUpload(UINT Subresource, bool upload) { m_needsUpload.set(Subresource, upload); }
bool NeedsAnyUpload() { return m_needsUpload.any(); }
void ClearNeedsUpload() { return m_needsUpload.clearAll(); }
private: private:
@ -392,6 +401,9 @@ namespace dxvk {
D3D9SubresourceBitset m_dirty = { }; D3D9SubresourceBitset m_dirty = { };
D3D9SubresourceBitset m_uploading = { };
D3D9SubresourceBitset m_needsUpload = { };
/** /**
* \brief Mip level * \brief Mip level
* \returns Size of packed mip level in bytes * \returns Size of packed mip level in bytes

View File

@ -2662,7 +2662,11 @@ namespace dxvk {
BindShader<DxsoProgramTypes::VertexShader>( BindShader<DxsoProgramTypes::VertexShader>(
GetCommonShader(shader), GetCommonShader(shader),
GetVertexShaderPermutation()); GetVertexShaderPermutation());
m_vsShaderMasks = newShader->GetShaderMask();
} }
else
m_vsShaderMasks = D3D9ShaderMasks();
m_flags.set(D3D9DeviceFlag::DirtyInputLayout); m_flags.set(D3D9DeviceFlag::DirtyInputLayout);
@ -2987,6 +2991,15 @@ namespace dxvk {
BindShader<DxsoProgramTypes::PixelShader>( BindShader<DxsoProgramTypes::PixelShader>(
GetCommonShader(shader), GetCommonShader(shader),
GetPixelShaderPermutation()); GetPixelShaderPermutation());
m_psShaderMasks = newShader->GetShaderMask();
}
else {
// TODO: What fixed function textures are in use?
// Currently we are making all 8 of them as in use here.
// The RT output is always 0 for fixed function.
m_psShaderMasks = FixedFunctionMask;
} }
UpdateActiveHazards(); UpdateActiveHazards();
@ -3498,9 +3511,7 @@ namespace dxvk {
BindTexture(StateSampler); BindTexture(StateSampler);
// We only care about PS samplers UpdateActiveTextures(StateSampler);
if (likely(StateSampler <= caps::MaxSamplers))
UpdateActiveRTTextures(StateSampler);
return D3D_OK; return D3D_OK;
} }
@ -3902,14 +3913,17 @@ namespace dxvk {
// calling app promises not to overwrite data that is in use // calling app promises not to overwrite data that is in use
// or is reading. Remember! This will only trigger for MANAGED resources // or is reading. Remember! This will only trigger for MANAGED resources
// that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting. // that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting.
const bool uploading = pResource->GetUploading(Subresource);
const bool readOnly = Flags & D3DLOCK_READONLY; const bool readOnly = Flags & D3DLOCK_READONLY;
const bool skipWait = (readOnly && managed) || scratch || (readOnly && systemmem && !dirty); const bool skipWait = (managed && !uploading) || (readOnly && managed) || scratch || (readOnly && systemmem && !dirty);
if (alloced) if (alloced)
std::memset(physSlice.mapPtr, 0, physSlice.length); std::memset(physSlice.mapPtr, 0, physSlice.length);
else if (!skipWait) { else if (!skipWait) {
if (!WaitForResource(mappedBuffer, Flags)) if (!WaitForResource(mappedBuffer, Flags))
return D3DERR_WASSTILLDRAWING; return D3DERR_WASSTILLDRAWING;
pResource->ClearUploading();
} }
} }
else { else {
@ -4036,7 +4050,22 @@ namespace dxvk {
// Do we have a pending copy? // Do we have a pending copy?
if (!pResource->GetReadOnlyLocked(Subresource)) { if (!pResource->GetReadOnlyLocked(Subresource)) {
// Only flush buffer -> image if we actually have an image // Only flush buffer -> image if we actually have an image
if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) if (pResource->IsManaged()) {
pResource->SetNeedsUpload(Subresource, true);
for (uint32_t tex = m_activeTextures; tex; tex &= tex - 1) {
// Guaranteed to not be nullptr...
const uint32_t i = bit::tzcnt(tex);
auto texInfo = GetCommonTexture(m_state.textures[i]);
if (texInfo == pResource) {
m_activeTexturesToUpload |= 1 << i;
// We can early out here, no need to add another index for this.
break;
}
}
}
else if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED)
this->FlushImage(pResource, Subresource); this->FlushImage(pResource, Subresource);
} }
@ -4075,6 +4104,8 @@ namespace dxvk {
auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo; auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo;
pResource->SetUploading(Subresource, true);
if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) { if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) {
EmitCs([ EmitCs([
cSrcBuffer = copyBuffer, cSrcBuffer = copyBuffer,
@ -4647,20 +4678,6 @@ namespace dxvk {
} }
inline D3D9ShaderMasks D3D9DeviceEx::GetShaderMasks() {
const auto* shader = GetCommonShader(m_state.pixelShader);
if (likely(shader != nullptr))
return shader->GetShaderMask();
// TODO: What fixed function textures are in use?
// Currently we are making all 8 of them as in use here.
// The RT output is always 0 for fixed function.
return D3D9ShaderMasks{ 0b1111111, 0b1 };
}
inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) { inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) {
const uint32_t bit = 1 << index; const uint32_t bit = 1 << index;
@ -4675,21 +4692,30 @@ namespace dxvk {
} }
inline void D3D9DeviceEx::UpdateActiveRTTextures(uint32_t index) { inline void D3D9DeviceEx::UpdateActiveTextures(uint32_t index) {
const uint32_t bit = 1 << index; const uint32_t bit = 1 << index;
m_activeRTTextures &= ~bit; m_activeRTTextures &= ~bit;
m_activeTextures &= ~bit;
m_activeTexturesToUpload &= ~bit;
auto tex = GetCommonTexture(m_state.textures[index]); auto tex = GetCommonTexture(m_state.textures[index]);
if (tex != nullptr && tex->IsRenderTarget()) if (tex != nullptr) {
m_activeRTTextures |= bit; m_activeTextures |= bit;
if (unlikely(tex->IsRenderTarget()))
m_activeRTTextures |= bit;
if (unlikely(tex->NeedsAnyUpload()))
m_activeTexturesToUpload |= bit;
}
UpdateActiveHazards(); UpdateActiveHazards();
} }
inline void D3D9DeviceEx::UpdateActiveHazards() { inline void D3D9DeviceEx::UpdateActiveHazards() {
auto masks = GetShaderMasks(); auto masks = m_psShaderMasks;
masks.rtMask &= m_activeRTs; masks.rtMask &= m_activeRTs;
masks.samplerMask &= m_activeRTTextures; masks.samplerMask &= m_activeRTTextures;
@ -4727,6 +4753,26 @@ namespace dxvk {
} }
void D3D9DeviceEx::UploadManagedTextures(uint32_t mask) {
for (uint32_t tex = mask; tex; tex &= tex - 1) {
// Guaranteed to not be nullptr...
auto texInfo = GetCommonTexture(m_state.textures[bit::tzcnt(tex)]);
for (uint32_t i = 0; i < texInfo->GetUploadBitmask().dwordCount(); i++) {
for (uint32_t subresources = texInfo->GetUploadBitmask().dword(i); subresources; subresources &= subresources - 1) {
uint32_t subresource = i * 32 + bit::tzcnt(subresources);
this->FlushImage(texInfo, subresource);
}
}
texInfo->ClearNeedsUpload();
}
m_activeTexturesToUpload = 0;
}
template <bool Points> template <bool Points>
void D3D9DeviceEx::UpdatePointMode() { void D3D9DeviceEx::UpdatePointMode() {
if constexpr (!Points) { if constexpr (!Points) {
@ -5382,6 +5428,11 @@ namespace dxvk {
FlushBuffer(vbo); FlushBuffer(vbo);
} }
uint32_t texturesToUpload = m_activeTexturesToUpload;
texturesToUpload &= m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask;
if (unlikely(texturesToUpload != 0))
UploadManagedTextures(texturesToUpload);
auto* ibo = GetCommonBuffer(m_state.indices); auto* ibo = GetCommonBuffer(m_state.indices);
if (ibo != nullptr && ibo->NeedsUpload()) if (ibo != nullptr && ibo->NeedsUpload())
FlushBuffer(ibo); FlushBuffer(ibo);

View File

@ -734,16 +734,16 @@ namespace dxvk {
void Flush(); void Flush();
D3D9ShaderMasks GetShaderMasks();
void UpdateActiveRTs(uint32_t index); void UpdateActiveRTs(uint32_t index);
void UpdateActiveRTTextures(uint32_t index); void UpdateActiveTextures(uint32_t index);
void UpdateActiveHazards(); void UpdateActiveHazards();
void MarkRenderHazards(); void MarkRenderHazards();
void UploadManagedTextures(uint32_t mask);
template <bool Points> template <bool Points>
void UpdatePointMode(); void UpdatePointMode();
@ -1024,6 +1024,11 @@ namespace dxvk {
uint32_t m_activeRTTextures = 0; uint32_t m_activeRTTextures = 0;
uint32_t m_activeHazards = 0; uint32_t m_activeHazards = 0;
uint32_t m_alphaSwizzleRTs = 0; uint32_t m_alphaSwizzleRTs = 0;
uint32_t m_activeTextures = 0;
uint32_t m_activeTexturesToUpload = 0;
D3D9ShaderMasks m_vsShaderMasks = D3D9ShaderMasks();
D3D9ShaderMasks m_psShaderMasks = FixedFunctionMask;
D3D9ViewportInfo m_viewportInfo; D3D9ViewportInfo m_viewportInfo;

View File

@ -61,6 +61,13 @@ namespace dxvk {
m_shaders = pModule->compile(*pDxsoModuleInfo, name, AnalysisInfo, constantLayout); m_shaders = pModule->compile(*pDxsoModuleInfo, name, AnalysisInfo, constantLayout);
m_isgn = pModule->isgn(); m_isgn = pModule->isgn();
m_usedSamplers = pModule->usedSamplers(); m_usedSamplers = pModule->usedSamplers();
// Shift up these sampler bits so we can just
// do an or per-draw in the device.
// We shift by 17 because 16 ps samplers + 1 dmap (tess)
if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT)
m_usedSamplers <<= 17;
m_usedRTs = pModule->usedRTs(); m_usedRTs = pModule->usedRTs();
m_info = pModule->info(); m_info = pModule->info();

View File

@ -18,6 +18,9 @@ namespace dxvk {
uint32_t rtMask; uint32_t rtMask;
}; };
static constexpr D3D9ShaderMasks FixedFunctionMask =
{ 0b1111111, 0b1 };
struct D3D9MipFilter { struct D3D9MipFilter {
bool MipsEnabled; bool MipsEnabled;
VkSamplerMipmapMode MipFilter; VkSamplerMipmapMode MipFilter;