1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-01-30 22:52:14 +01:00

added varying counting to frag shaders, fixed some stuff, needs work

This commit is contained in:
Unknown 2019-09-20 23:05:19 +01:00
parent 375b5324d7
commit 3da3187629
4 changed files with 61 additions and 5 deletions

View File

@ -220,6 +220,7 @@ typedef struct VkShaderModule_T
VkRpiAssemblyMappingEXT* mappings;
uint32_t numMappings;
uint32_t hasThreadSwitch;
uint32_t numVaryings;
} _shaderModule;
typedef struct VkDescriptorSetLayout_T

View File

@ -197,7 +197,7 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
0, //TODO point size included in shaded vertex data?
1, //enable clipping
0, //TODO fragment number of used uniforms?
0, //TODO fragment number of varyings?
cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->numVaryings, //fragment number of varyings
0, //fragment uniform address?
fragCode, //fragment code address
0, //TODO vertex number of used uniforms?

View File

@ -67,6 +67,31 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
}
}
shader->numVaryings = 0;
for(uint64_t d = 0; d < numInstructions; ++d)
{
unsigned is_sem = ((instructions[d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((instructions[d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
{
unsigned raddr_a = ((instructions[d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((instructions[d] & (0x3fll << 12)) >> 12);
if(raddr_a == 35)
{
shader->numVaryings++;
}
//don't count small immediates
if(sig_bits != 13 && raddr_b == 35)
{
shader->numVaryings++;
}
}
}
printf("\n");
FREE(instructions);

View File

@ -845,7 +845,6 @@ void CreateShaders()
0x500009e7009e7000 sig_unlock_score nop nop, r0, r0 ; nop nop, r0, r0
VS prog 2/1 QPU:
0xd002102702821f80 sig_small_imm fsub rb0, 2.0, uni ; nop nop, r0, r0
0x00401a00:
0000 0000 0100 0000 0001 1010 0000 0000
///addr: 0
@ -854,6 +853,7 @@ VS prog 2/1 QPU:
///horizontal
///stride=1
///vectors to read = 4
0xd002102702821f80 sig_small_imm fsub rb0, 2.0, uni ; nop nop, r0, r0
0xe0024c6700401a00 load_imm vr_setup, nop, 0x00401a00 (0.000000)
0x100049e220c20037 nop nop, r0, r0 ; fmul r2, vpm_read, uni
0x100049e3209c0017 nop nop, r0, r0 ; fmul r3, r2, rb0
@ -868,11 +868,30 @@ VS prog 2/1 QPU:
0x10020c27159e7240 mov vpm, r1 ; nop nop, r0, r0
0x300009e7009e7000 sig_end nop nop, r0, r0 ; nop nop, r0, r0
0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
CS prog 2/2 QPU:
0xe0024c6700201a00 load_imm vr_setup, nop, 0x00201a00 (0.000000)
0x100208a715c27d80 mov r2, vpm_read ; nop nop, r0, r0
0xe0025c6700001a00 load_imm vw_setup, nop, 0x00001a00 (0.000000)
0x100248f095c27d92 mov r3, vpm_read ; mov vpm, r2
0x10024c21358276de mov vpm, r3 ; fmul r1, r3, uni
0xd00208e702821f80 sig_small_imm fsub r3, 2.0, uni ; nop nop, r0, r0
0x100049e220827016 nop nop, r0, r0 ; fmul r2, r2, uni
0x100049e0209e7013 nop nop, r0, r0 ; fmul r0, r2, r3
0x10124021279e700b ftoi ra0.16a, r0, r0 ; fmul r1, r1, r3
0x10220027079e7240 ftoi ra0.16b, r1, r1 ; nop nop, r0, r0
0xd0020c27159c0fc0 sig_small_imm mov vpm, 0 ; nop nop, r0, r0
0xd0020c27159e0fc0 sig_small_imm mov vpm, 1.0 ; nop nop, r0, r0
0x10020c2715027d80 mov vpm, ra0 ; nop nop, r0, r0
0x10020c2715827d80 mov vpm, uni ; nop nop, r0, r0
0x10020c27159e76c0 mov vpm, r3 ; nop nop, r0, r0
0x300009e7009e7000 sig_end nop nop, r0, r0 ; nop nop, r0, r0
0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
0x100009e7009e7000 nop nop, r0, r0 ; nop nop, r0, r0
/**/
//TODO doesn't work for some reason...
char vs_asm_code[] =
///0x40000000 = 2.0
///uni = 1.0
@ -994,7 +1013,7 @@ VS prog 2/1 QPU:
"sig_none ; nop = nop(r0, r0) ; r2 = fmul.always(r2, r3);\n"
"sig_none ; nop = nop.pm(r0, r0) ; r0.8b = fmul.always(r1, r2) ;"
"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0) ; r0.8a = v8min.always(b, b) ;"
"sig_small_imm ; nop = nop.pm(r0, r0, nop, 1) ; r0.8d = v8min.always(b, b) ;"
"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8d = v8min.always(b, b) ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
@ -1031,7 +1050,18 @@ VS prog 2/1 QPU:
"sig_none ; r3 = fadd.pm.always(r0, r5) ; r0.8c = v8min.always(r2, r2) ;"
"sig_none ; nop = nop.pm(r0, r0) ; r0.8b = v8min.always(r3, r3) ;"
"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0) ; r0.8a = v8min.always(b, b) ;"
"sig_small_imm ; nop = nop.pm(r0, r0, nop, 1) ; r0.8d = v8min.always(b, b) ;"
"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8d = v8min.always(b, b) ;"
///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8d = v8min.always(b, b) ;"
///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8c = v8min.always(b, b) ;"
///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8b = v8min.always(b, b) ;"
///"sig_small_imm ; nop = nop.pm(r0, r0, nop, 0x3f800000) ; r0.8a = v8min.always(b, b) ;"
///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8d = v8min.always(a, a) ;"
///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8c = v8min.always(a, a) ;"
///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8b = v8min.always(a, a) ;"
///"sig_none ; nop = nop.pm(r0, r0, pay_zw, nop) ; r0.8a = v8min.always(a, a) ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"