mirror of
https://github.com/doitsujin/dxvk.git
synced 2024-12-03 22:24:13 +01:00
[d3d9] Optimize NV12 conversion to use a macropixel of [2, 1]
This commit is contained in:
parent
9fe1b9d03f
commit
e2a26f2bc5
@ -37,7 +37,7 @@ namespace dxvk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
case D3D9ConversionFormat_NV12:
|
case D3D9ConversionFormat_NV12:
|
||||||
ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R8_UINT, 0, { 1u, 1u });
|
ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R16_UINT, 0, { 2u, 1u });
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case D3D9ConversionFormat_L6V5U5:
|
case D3D9ConversionFormat_L6V5U5:
|
||||||
|
@ -16,6 +16,11 @@ float unpackUnorm(uint p) {
|
|||||||
return float(p) / 255.0;
|
return float(p) / 255.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vec2 unpackUnorm2x8(uint p) {
|
||||||
|
uvec2 value = uvec2(p & 0xFF, p >> 8);
|
||||||
|
return vec2(unpackUnorm(value.x), unpackUnorm(value.y));
|
||||||
|
}
|
||||||
|
|
||||||
mat3x4 g_yuv_to_rgb = {
|
mat3x4 g_yuv_to_rgb = {
|
||||||
{ 298 / 256, 0, 409 / 256, 0.5 },
|
{ 298 / 256, 0, 409 / 256, 0.5 },
|
||||||
{ 298 / 256, -100 / 256, -208 / 256, 0.5 },
|
{ 298 / 256, -100 / 256, -208 / 256, 0.5 },
|
||||||
|
@ -18,39 +18,46 @@ uniform u_info_t {
|
|||||||
uvec2 extent;
|
uvec2 extent;
|
||||||
} u_info;
|
} u_info;
|
||||||
|
|
||||||
float fetchUnorm(usamplerBuffer source, uint offset) {
|
vec2 fetchUnorm2x8(usamplerBuffer source, uint offset) {
|
||||||
return unpackUnorm(texelFetch(src, int(offset)).r);
|
return unpackUnorm2x8(texelFetch(src, int(offset)).r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Format is:
|
||||||
|
// YYYYYYYYYYYYYYY...
|
||||||
|
// YYYYYYYYYYYYYYY...
|
||||||
|
// UVUVUVUVUVUVUVU...
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
ivec3 thread_id = ivec3(gl_GlobalInvocationID);
|
ivec3 thread_id = ivec3(gl_GlobalInvocationID);
|
||||||
|
|
||||||
if (all(lessThan(thread_id.xy, u_info.extent))) {
|
if (all(lessThan(thread_id.xy, u_info.extent))) {
|
||||||
uvec2 pitch = uvec2(u_info.extent.x, u_info.extent.y);
|
uvec2 pitch = uvec2(u_info.extent.x, u_info.extent.y);
|
||||||
|
|
||||||
// Format is:
|
|
||||||
// YYYYYYYYYYYYYYY...
|
|
||||||
// UVUVUVUVUVUVUVU...
|
|
||||||
uint offset = thread_id.x
|
uint offset = thread_id.x
|
||||||
+ thread_id.y * pitch.x;
|
+ thread_id.y * pitch.x;
|
||||||
|
|
||||||
float c0 = fetchUnorm(src, offset) - (16 / 255.0);
|
// Fetch 2 luminance samples.
|
||||||
|
vec2 y = fetchUnorm2x8(src, offset) - (16 / 255.0);
|
||||||
|
|
||||||
// Floor .x to the nearest 2, because
|
// Go into the second plane to get the chroma data.
|
||||||
// UV data is in WORDs, and we want to get the color
|
// UV data is subsampled as [2, 2]
|
||||||
// for this pixel.
|
// So we need to divide thread_id.y by 2.
|
||||||
// Then divide thread_id.y by 2 because the macropixel
|
// thread_id.x is already accounted for as we read uint16
|
||||||
// layout for chroma data is [2, 2].
|
offset = thread_id.x
|
||||||
offset = (thread_id.x / 2) * 2
|
|
||||||
+ thread_id.y / 2 * pitch.x
|
+ thread_id.y / 2 * pitch.x
|
||||||
+ pitch.x * pitch.y;
|
+ pitch.x * pitch.y;
|
||||||
|
|
||||||
float u = fetchUnorm(src, offset) - (128 / 255.0);
|
vec2 uv = fetchUnorm2x8(src, offset) - (128 / 255.0);
|
||||||
float v = fetchUnorm(src, offset + 1) - (128 / 255.0);
|
|
||||||
|
|
||||||
// The NV12 format seems to use the BT.703 color space.
|
// The NV12 format seems to use the BT.703 color space.
|
||||||
vec4 color0 = convertBT_703(vec3(c0, u, v));
|
vec4 color0 = convertBT_703(vec3(y.x, uv.x, uv.y));
|
||||||
|
vec4 color1 = convertBT_703(vec3(y.y, uv.x, uv.y));
|
||||||
|
|
||||||
|
// We write as a macropixel of [2, 1]
|
||||||
|
// So write out 2 pixels in this run.
|
||||||
|
ivec2 writePos = thread_id.xy * ivec2(2, 1);
|
||||||
|
|
||||||
imageStore(dst, thread_id.xy, color0);
|
imageStore(dst, ivec2(writePos.x, writePos.y), color0);
|
||||||
|
imageStore(dst, ivec2(writePos.x + 1, writePos.y), color1);
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user