From 15d5b69d2c9d9144e6c9c2d64e1c4cd749c90826 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 5 Oct 2024 11:34:59 +0200 Subject: [PATCH] [dxvk] Add new format conversion shaders Intended to replace the old pack/unpack shaders. --- src/dxvk/meson.build | 7 + src/dxvk/shaders/dxvk_buffer_to_image_d.frag | 49 +++++ .../dxvk_buffer_to_image_ds_export.frag | 60 ++++++ .../dxvk_buffer_to_image_s_discard.frag | 49 +++++ src/dxvk/shaders/dxvk_formats.glsl | 187 ++++++++++++++++++ src/dxvk/shaders/dxvk_image_to_buffer_ds.comp | 65 ++++++ src/dxvk/shaders/dxvk_image_to_buffer_f.comp | 55 ++++++ 7 files changed, 472 insertions(+) create mode 100644 src/dxvk/shaders/dxvk_buffer_to_image_d.frag create mode 100644 src/dxvk/shaders/dxvk_buffer_to_image_ds_export.frag create mode 100644 src/dxvk/shaders/dxvk_buffer_to_image_s_discard.frag create mode 100644 src/dxvk/shaders/dxvk_formats.glsl create mode 100644 src/dxvk/shaders/dxvk_image_to_buffer_ds.comp create mode 100644 src/dxvk/shaders/dxvk_image_to_buffer_f.comp diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index a696fff60..edf8ecafe 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -3,6 +3,10 @@ dxvk_shaders = files([ 'shaders/dxvk_blit_frag_2d.frag', 'shaders/dxvk_blit_frag_3d.frag', + 'shaders/dxvk_buffer_to_image_d.frag', + 'shaders/dxvk_buffer_to_image_ds_export.frag', + 'shaders/dxvk_buffer_to_image_s_discard.frag', + 'shaders/dxvk_clear_buffer_u.comp', 'shaders/dxvk_clear_buffer_f.comp', 'shaders/dxvk_clear_image1d_u.comp', @@ -30,6 +34,9 @@ dxvk_shaders = files([ 'shaders/dxvk_fullscreen_vert.vert', 'shaders/dxvk_fullscreen_layer_vert.vert', + 'shaders/dxvk_image_to_buffer_ds.comp', + 'shaders/dxvk_image_to_buffer_f.comp', + 'shaders/dxvk_pack_d24s8.comp', 'shaders/dxvk_pack_d32s8.comp', diff --git a/src/dxvk/shaders/dxvk_buffer_to_image_d.frag b/src/dxvk/shaders/dxvk_buffer_to_image_d.frag new file mode 100644 index 000000000..136d22af4 --- /dev/null +++ b/src/dxvk/shaders/dxvk_buffer_to_image_d.frag @@ -0,0 +1,49 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#include "dxvk_formats.glsl" + +layout(constant_id = 0) const uint src_format = VK_FORMAT_UNDEFINED; + +layout(binding = 0) uniform usamplerBuffer u_data; + +layout(push_constant) +uniform push_data_t { + uvec3 image_offset; + uint buffer_offset; + uvec3 image_extent; + uint buffer_image_width; + uint buffer_image_height; + uint stencil_bit_index; +}; + +void main() { + uvec2 location = uvec2(gl_FragCoord.xy) - image_offset.xy; + + int offset = int(buffer_offset + location.x + + buffer_image_width * (location.y + buffer_image_height * gl_Layer)); + + switch (src_format) { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D16_UNORM_S8_UINT: { + uint data = texelFetch(u_data, offset).x; + gl_FragDepth = float(data & 0xffffu) / float(0xffffu); + } break; + + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_X8_D24_UNORM_PACK32: { + uint data = texelFetch(u_data, offset).x; + gl_FragDepth = float(data & 0xffffffu) / float(0xffffffu); + } break; + + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: { + uint data = texelFetch(u_data, offset).x; + gl_FragDepth = uintBitsToFloat(data); + } break; + + default: + gl_FragDepth = 0.0f; + } +} diff --git a/src/dxvk/shaders/dxvk_buffer_to_image_ds_export.frag b/src/dxvk/shaders/dxvk_buffer_to_image_ds_export.frag new file mode 100644 index 000000000..1569b092c --- /dev/null +++ b/src/dxvk/shaders/dxvk_buffer_to_image_ds_export.frag @@ -0,0 +1,60 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable +#extension GL_ARB_shader_stencil_export : enable + +#include "dxvk_formats.glsl" + +layout(constant_id = 0) const uint src_format = VK_FORMAT_UNDEFINED; + +layout(binding = 0) uniform usamplerBuffer u_data; + +layout(push_constant) +uniform push_data_t { + uvec3 image_offset; + uint buffer_offset; + uvec3 image_extent; + uint buffer_image_width; + uint buffer_image_height; + uint stencil_bit_index; +}; + +void main() { + uvec2 location = uvec2(gl_FragCoord.xy) - image_offset.xy; + + int offset = int(buffer_offset + location.x + + buffer_image_width * (location.y + buffer_image_height * gl_Layer)); + + switch (src_format) { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D16_UNORM_S8_UINT: { + uvec2 data = texelFetch(u_data, offset).xy; + gl_FragDepth = float(data & 0xffffu) / float(0xffffu); + gl_FragStencilRefARB = int(data.y & 0xffu); + } break; + + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_X8_D24_UNORM_PACK32: { + uint data = texelFetch(u_data, offset).x; + gl_FragDepth = float(data & 0xffffffu) / float(0xffffffu); + gl_FragStencilRefARB = int(data >> 24); + } break; + + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: { + uvec2 data = texelFetch(u_data, offset).xy; + gl_FragDepth = uintBitsToFloat(data.x); + gl_FragStencilRefARB = int(data.y & 0xffu); + } break; + + case VK_FORMAT_S8_UINT: { + uint data = texelFetch(u_data, offset).x; + gl_FragDepth = 0.0f; + gl_FragStencilRefARB = int(data & 0xffu); + } break; + + default: + gl_FragDepth = 0.0f; + gl_FragStencilRefARB = 0; + } +} diff --git a/src/dxvk/shaders/dxvk_buffer_to_image_s_discard.frag b/src/dxvk/shaders/dxvk_buffer_to_image_s_discard.frag new file mode 100644 index 000000000..8211dc46c --- /dev/null +++ b/src/dxvk/shaders/dxvk_buffer_to_image_s_discard.frag @@ -0,0 +1,49 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#include "dxvk_formats.glsl" + +layout(constant_id = 0) const uint src_format = VK_FORMAT_UNDEFINED; + +layout(binding = 0) uniform usamplerBuffer u_data; + +layout(push_constant) +uniform push_data_t { + uvec3 image_offset; + uint buffer_offset; + uvec3 image_extent; + uint buffer_image_width; + uint buffer_image_height; + uint stencil_bit_index; +}; + +void main() { + uvec2 location = uvec2(gl_FragCoord.xy) - image_offset.xy; + + int offset = int(buffer_offset + location.x + + buffer_image_width * (location.y + buffer_image_height * gl_Layer)); + + uint stencil = 0u; + + switch (src_format) { + case VK_FORMAT_D24_UNORM_S8_UINT: { + uint data = texelFetch(u_data, offset).x; + stencil = data >> 24; + } break; + + case VK_FORMAT_D16_UNORM_S8_UINT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: { + uint data = texelFetch(u_data, offset).y; + stencil = data & 0xffu; + } break; + + case VK_FORMAT_S8_UINT: { + uint data = texelFetch(u_data, offset).x; + stencil = data & 0xffu; + } break; + } + + if ((stencil & (1u << stencil_bit_index)) == 0u) + discard; +} diff --git a/src/dxvk/shaders/dxvk_formats.glsl b/src/dxvk/shaders/dxvk_formats.glsl new file mode 100644 index 000000000..f96514e79 --- /dev/null +++ b/src/dxvk/shaders/dxvk_formats.glsl @@ -0,0 +1,187 @@ +#define VK_FORMAT_UNDEFINED (0) +#define VK_FORMAT_R4G4_UNORM_PACK8 (1) +#define VK_FORMAT_R4G4B4A4_UNORM_PACK16 (2) +#define VK_FORMAT_B4G4R4A4_UNORM_PACK16 (3) +#define VK_FORMAT_R5G6B5_UNORM_PACK16 (4) +#define VK_FORMAT_B5G6R5_UNORM_PACK16 (5) +#define VK_FORMAT_R5G5B5A1_UNORM_PACK16 (6) +#define VK_FORMAT_B5G5R5A1_UNORM_PACK16 (7) +#define VK_FORMAT_A1R5G5B5_UNORM_PACK16 (8) +#define VK_FORMAT_R8_UNORM (9) +#define VK_FORMAT_R8_SNORM (10) +#define VK_FORMAT_R8_USCALED (11) +#define VK_FORMAT_R8_SSCALED (12) +#define VK_FORMAT_R8_UINT (13) +#define VK_FORMAT_R8_SINT (14) +#define VK_FORMAT_R8_SRGB (15) +#define VK_FORMAT_R8G8_UNORM (16) +#define VK_FORMAT_R8G8_SNORM (17) +#define VK_FORMAT_R8G8_USCALED (18) +#define VK_FORMAT_R8G8_SSCALED (19) +#define VK_FORMAT_R8G8_UINT (20) +#define VK_FORMAT_R8G8_SINT (21) +#define VK_FORMAT_R8G8_SRGB (22) +#define VK_FORMAT_R8G8B8_UNORM (23) +#define VK_FORMAT_R8G8B8_SNORM (24) +#define VK_FORMAT_R8G8B8_USCALED (25) +#define VK_FORMAT_R8G8B8_SSCALED (26) +#define VK_FORMAT_R8G8B8_UINT (27) +#define VK_FORMAT_R8G8B8_SINT (28) +#define VK_FORMAT_R8G8B8_SRGB (29) +#define VK_FORMAT_B8G8R8_UNORM (30) +#define VK_FORMAT_B8G8R8_SNORM (31) +#define VK_FORMAT_B8G8R8_USCALED (32) +#define VK_FORMAT_B8G8R8_SSCALED (33) +#define VK_FORMAT_B8G8R8_UINT (34) +#define VK_FORMAT_B8G8R8_SINT (35) +#define VK_FORMAT_B8G8R8_SRGB (36) +#define VK_FORMAT_R8G8B8A8_UNORM (37) +#define VK_FORMAT_R8G8B8A8_SNORM (38) +#define VK_FORMAT_R8G8B8A8_USCALED (39) +#define VK_FORMAT_R8G8B8A8_SSCALED (40) +#define VK_FORMAT_R8G8B8A8_UINT (41) +#define VK_FORMAT_R8G8B8A8_SINT (42) +#define VK_FORMAT_R8G8B8A8_SRGB (43) +#define VK_FORMAT_B8G8R8A8_UNORM (44) +#define VK_FORMAT_B8G8R8A8_SNORM (45) +#define VK_FORMAT_B8G8R8A8_USCALED (46) +#define VK_FORMAT_B8G8R8A8_SSCALED (47) +#define VK_FORMAT_B8G8R8A8_UINT (48) +#define VK_FORMAT_B8G8R8A8_SINT (49) +#define VK_FORMAT_B8G8R8A8_SRGB (50) +#define VK_FORMAT_A8B8G8R8_UNORM_PACK32 (51) +#define VK_FORMAT_A8B8G8R8_SNORM_PACK32 (52) +#define VK_FORMAT_A8B8G8R8_USCALED_PACK32 (53) +#define VK_FORMAT_A8B8G8R8_SSCALED_PACK32 (54) +#define VK_FORMAT_A8B8G8R8_UINT_PACK32 (55) +#define VK_FORMAT_A8B8G8R8_SINT_PACK32 (56) +#define VK_FORMAT_A8B8G8R8_SRGB_PACK32 (57) +#define VK_FORMAT_A2R10G10B10_UNORM_PACK32 (58) +#define VK_FORMAT_A2R10G10B10_SNORM_PACK32 (59) +#define VK_FORMAT_A2R10G10B10_USCALED_PACK32 (60) +#define VK_FORMAT_A2R10G10B10_SSCALED_PACK32 (61) +#define VK_FORMAT_A2R10G10B10_UINT_PACK32 (62) +#define VK_FORMAT_A2R10G10B10_SINT_PACK32 (63) +#define VK_FORMAT_A2B10G10R10_UNORM_PACK32 (64) +#define VK_FORMAT_A2B10G10R10_SNORM_PACK32 (65) +#define VK_FORMAT_A2B10G10R10_USCALED_PACK32 (66) +#define VK_FORMAT_A2B10G10R10_SSCALED_PACK32 (67) +#define VK_FORMAT_A2B10G10R10_UINT_PACK32 (68) +#define VK_FORMAT_A2B10G10R10_SINT_PACK32 (69) +#define VK_FORMAT_R16_UNORM (70) +#define VK_FORMAT_R16_SNORM (71) +#define VK_FORMAT_R16_USCALED (72) +#define VK_FORMAT_R16_SSCALED (73) +#define VK_FORMAT_R16_UINT (74) +#define VK_FORMAT_R16_SINT (75) +#define VK_FORMAT_R16_SFLOAT (76) +#define VK_FORMAT_R16G16_UNORM (77) +#define VK_FORMAT_R16G16_SNORM (78) +#define VK_FORMAT_R16G16_USCALED (79) +#define VK_FORMAT_R16G16_SSCALED (80) +#define VK_FORMAT_R16G16_UINT (81) +#define VK_FORMAT_R16G16_SINT (82) +#define VK_FORMAT_R16G16_SFLOAT (83) +#define VK_FORMAT_R16G16B16_UNORM (84) +#define VK_FORMAT_R16G16B16_SNORM (85) +#define VK_FORMAT_R16G16B16_USCALED (86) +#define VK_FORMAT_R16G16B16_SSCALED (87) +#define VK_FORMAT_R16G16B16_UINT (88) +#define VK_FORMAT_R16G16B16_SINT (89) +#define VK_FORMAT_R16G16B16_SFLOAT (90) +#define VK_FORMAT_R16G16B16A16_UNORM (91) +#define VK_FORMAT_R16G16B16A16_SNORM (92) +#define VK_FORMAT_R16G16B16A16_USCALED (93) +#define VK_FORMAT_R16G16B16A16_SSCALED (94) +#define VK_FORMAT_R16G16B16A16_UINT (95) +#define VK_FORMAT_R16G16B16A16_SINT (96) +#define VK_FORMAT_R16G16B16A16_SFLOAT (97) +#define VK_FORMAT_R32_UINT (98) +#define VK_FORMAT_R32_SINT (99) +#define VK_FORMAT_R32_SFLOAT (100) +#define VK_FORMAT_R32G32_UINT (101) +#define VK_FORMAT_R32G32_SINT (102) +#define VK_FORMAT_R32G32_SFLOAT (103) +#define VK_FORMAT_R32G32B32_UINT (104) +#define VK_FORMAT_R32G32B32_SINT (105) +#define VK_FORMAT_R32G32B32_SFLOAT (106) +#define VK_FORMAT_R32G32B32A32_UINT (107) +#define VK_FORMAT_R32G32B32A32_SINT (108) +#define VK_FORMAT_R32G32B32A32_SFLOAT (109) +#define VK_FORMAT_R64_UINT (110) +#define VK_FORMAT_R64_SINT (111) +#define VK_FORMAT_R64_SFLOAT (112) +#define VK_FORMAT_R64G64_UINT (113) +#define VK_FORMAT_R64G64_SINT (114) +#define VK_FORMAT_R64G64_SFLOAT (115) +#define VK_FORMAT_R64G64B64_UINT (116) +#define VK_FORMAT_R64G64B64_SINT (117) +#define VK_FORMAT_R64G64B64_SFLOAT (118) +#define VK_FORMAT_R64G64B64A64_UINT (119) +#define VK_FORMAT_R64G64B64A64_SINT (120) +#define VK_FORMAT_R64G64B64A64_SFLOAT (121) +#define VK_FORMAT_B10G11R11_UFLOAT_PACK32 (122) +#define VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 (123) +#define VK_FORMAT_D16_UNORM (124) +#define VK_FORMAT_X8_D24_UNORM_PACK32 (125) +#define VK_FORMAT_D32_SFLOAT (126) +#define VK_FORMAT_S8_UINT (127) +#define VK_FORMAT_D16_UNORM_S8_UINT (128) +#define VK_FORMAT_D24_UNORM_S8_UINT (129) +#define VK_FORMAT_D32_SFLOAT_S8_UINT (130) +#define VK_FORMAT_BC1_RGB_UNORM_BLOCK (131) +#define VK_FORMAT_BC1_RGB_SRGB_BLOCK (132) +#define VK_FORMAT_BC1_RGBA_UNORM_BLOCK (133) +#define VK_FORMAT_BC1_RGBA_SRGB_BLOCK (134) +#define VK_FORMAT_BC2_UNORM_BLOCK (135) +#define VK_FORMAT_BC2_SRGB_BLOCK (136) +#define VK_FORMAT_BC3_UNORM_BLOCK (137) +#define VK_FORMAT_BC3_SRGB_BLOCK (138) +#define VK_FORMAT_BC4_UNORM_BLOCK (139) +#define VK_FORMAT_BC4_SNORM_BLOCK (140) +#define VK_FORMAT_BC5_UNORM_BLOCK (141) +#define VK_FORMAT_BC5_SNORM_BLOCK (142) +#define VK_FORMAT_BC6H_UFLOAT_BLOCK (143) +#define VK_FORMAT_BC6H_SFLOAT_BLOCK (144) +#define VK_FORMAT_BC7_UNORM_BLOCK (145) +#define VK_FORMAT_BC7_SRGB_BLOCK (146) +#define VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK (147) +#define VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK (148) +#define VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK (149) +#define VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK (150) +#define VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK (151) +#define VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK (152) +#define VK_FORMAT_EAC_R11_UNORM_BLOCK (153) +#define VK_FORMAT_EAC_R11_SNORM_BLOCK (154) +#define VK_FORMAT_EAC_R11G11_UNORM_BLOCK (155) +#define VK_FORMAT_EAC_R11G11_SNORM_BLOCK (156) +#define VK_FORMAT_ASTC_4x4_UNORM_BLOCK (157) +#define VK_FORMAT_ASTC_4x4_SRGB_BLOCK (158) +#define VK_FORMAT_ASTC_5x4_UNORM_BLOCK (159) +#define VK_FORMAT_ASTC_5x4_SRGB_BLOCK (160) +#define VK_FORMAT_ASTC_5x5_UNORM_BLOCK (161) +#define VK_FORMAT_ASTC_5x5_SRGB_BLOCK (162) +#define VK_FORMAT_ASTC_6x5_UNORM_BLOCK (163) +#define VK_FORMAT_ASTC_6x5_SRGB_BLOCK (164) +#define VK_FORMAT_ASTC_6x6_UNORM_BLOCK (165) +#define VK_FORMAT_ASTC_6x6_SRGB_BLOCK (166) +#define VK_FORMAT_ASTC_8x5_UNORM_BLOCK (167) +#define VK_FORMAT_ASTC_8x5_SRGB_BLOCK (168) +#define VK_FORMAT_ASTC_8x6_UNORM_BLOCK (169) +#define VK_FORMAT_ASTC_8x6_SRGB_BLOCK (170) +#define VK_FORMAT_ASTC_8x8_UNORM_BLOCK (171) +#define VK_FORMAT_ASTC_8x8_SRGB_BLOCK (172) +#define VK_FORMAT_ASTC_10x5_UNORM_BLOCK (173) +#define VK_FORMAT_ASTC_10x5_SRGB_BLOCK (174) +#define VK_FORMAT_ASTC_10x6_UNORM_BLOCK (175) +#define VK_FORMAT_ASTC_10x6_SRGB_BLOCK (176) +#define VK_FORMAT_ASTC_10x8_UNORM_BLOCK (177) +#define VK_FORMAT_ASTC_10x8_SRGB_BLOCK (178) +#define VK_FORMAT_ASTC_10x10_UNORM_BLOCK (179) +#define VK_FORMAT_ASTC_10x10_SRGB_BLOCK (180) +#define VK_FORMAT_ASTC_12x10_UNORM_BLOCK (181) +#define VK_FORMAT_ASTC_12x10_SRGB_BLOCK (182) +#define VK_FORMAT_ASTC_12x12_UNORM_BLOCK (183) +#define VK_FORMAT_ASTC_12x12_SRGB_BLOCK (184) +#define VK_FORMAT_A4R4G4B4_UNORM_PACK16 (1000340000) +#define VK_FORMAT_A4B4G4R4_UNORM_PACK16 (1000340001) diff --git a/src/dxvk/shaders/dxvk_image_to_buffer_ds.comp b/src/dxvk/shaders/dxvk_image_to_buffer_ds.comp new file mode 100644 index 000000000..2a7a55671 --- /dev/null +++ b/src/dxvk/shaders/dxvk_image_to_buffer_ds.comp @@ -0,0 +1,65 @@ +#version 460 + +layout(local_size_x = 16, local_size_y = 16) in; + +#extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_samplerless_texture_functions : enable + +#include "dxvk_formats.glsl" + +layout(constant_id = 0) const uint dst_format = VK_FORMAT_UNDEFINED; + +layout(binding = 0) uniform writeonly uimageBuffer u_buffer; +layout(binding = 1) uniform texture2DArray u_depth; +layout(binding = 2) uniform utexture2DArray u_stencil; + +layout(push_constant) +uniform push_data_t { + uvec3 image_offset; + uint buffer_offset; + uvec3 image_extent; + uint buffer_image_width; + uint buffer_image_height; + uint stencil_bit_index; +}; + +void main() { + uvec3 location = uvec3(gl_GlobalInvocationID); + + if (any(greaterThanEqual(location, image_extent))) + return; + + int offset = int(buffer_offset + location.x + + buffer_image_width * (location.y + buffer_image_height * location.z)); + + uvec4 dst_value = uvec4(0u); + + float src_depth = texelFetch(u_depth, ivec3(location.xy + image_offset.xy, location.z), 0).x; + uint src_stencil = texelFetch(u_stencil, ivec3(location.xy + image_offset.xy, location.z), 0).x; + + switch (dst_format) { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D16_UNORM_S8_UINT: + dst_value.x = uint(roundEven(src_depth * float(0xffffu))); + dst_value.y = src_stencil; + break; + + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_X8_D24_UNORM_PACK32: + dst_value.x = uint(roundEven(src_depth * float(0xffffffu))); + dst_value.x |= src_stencil << 24; + break; + + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + dst_value.x = floatBitsToUint(src_depth); + dst_value.y = src_stencil; + break; + + case VK_FORMAT_S8_UINT: + dst_value.x = src_stencil; + break; + } + + imageStore(u_buffer, offset, dst_value); +} diff --git a/src/dxvk/shaders/dxvk_image_to_buffer_f.comp b/src/dxvk/shaders/dxvk_image_to_buffer_f.comp new file mode 100644 index 000000000..ae6e6bff9 --- /dev/null +++ b/src/dxvk/shaders/dxvk_image_to_buffer_f.comp @@ -0,0 +1,55 @@ +#version 460 + +layout(local_size_x = 16, local_size_y = 16) in; + +#extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_samplerless_texture_functions : enable + +#include "dxvk_formats.glsl" + +layout(constant_id = 0) const uint dst_format = VK_FORMAT_UNDEFINED; + +layout(binding = 0) uniform writeonly uimageBuffer u_buffer; +layout(binding = 1) uniform texture2DArray u_image; + +layout(push_constant) +uniform push_data_t { + uvec3 image_offset; + uint buffer_offset; + uvec3 image_extent; + uint buffer_image_width; + uint buffer_image_height; + uint stencil_bit_index; +}; + +void main() { + uvec3 location = uvec3(gl_GlobalInvocationID); + + if (any(greaterThanEqual(location, image_extent))) + return; + + int offset = int(buffer_offset + location.x + + buffer_image_width * (location.y + buffer_image_height * location.z)); + + uvec4 dst_color = uvec4(0u); + vec4 src_color = texelFetch(u_image, ivec3(location.xy + image_offset.xy, location.z), 0); + + switch (dst_format) { + case VK_FORMAT_D16_UNORM: + case VK_FORMAT_D16_UNORM_S8_UINT: + dst_color.x = uint(roundEven(src_color.x * float(0xffffu))); + break; + + case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_X8_D24_UNORM_PACK32: + dst_color.x = uint(roundEven(src_color.x * float(0xffffffu))); + break; + + case VK_FORMAT_D32_SFLOAT: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + dst_color.x = floatBitsToUint(src_color.x); + break; + } + + imageStore(u_buffer, offset, dst_color); +}