1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-02-17 14:54:20 +01:00

added vertex attrib encoding, removed shader patching

This commit is contained in:
Unknown 2019-12-08 15:31:42 +00:00
parent f9ac2afe0c
commit 06382178ff
5 changed files with 71 additions and 122 deletions

View File

@ -148,8 +148,8 @@ uint64_t encode_alu(qpu_sig_bits sig_bits,
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
qpu_op_mul op_mul,
qpu_op_add op_add,
qpu_op_mul op_mul,
qpu_raddr raddr_a,
qpu_raddr raddr_b,
qpu_mux add_a,
@ -227,8 +227,8 @@ uint64_t encode_alu_small_imm(qpu_unpack unpack_mode,
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
qpu_op_mul op_mul,
qpu_op_add op_add,
qpu_op_mul op_mul,
qpu_raddr raddr_a,
uint8_t small_imm,
qpu_mux add_a,
@ -247,8 +247,8 @@ uint64_t encode_alu_small_imm(qpu_unpack unpack_mode,
write_swap_flag,
waddr_add,
waddr_mul,
op_mul,
op_add,
op_mul,
raddr_a,
small_imm,
add_a,
@ -1067,11 +1067,11 @@ void assemble_qpu_asm(char* str, uint64_t* instructions)
{
if(sig_bit == QPU_SIG_SMALL_IMM)
{
instructions[instruction_counter] = encode_alu_small_imm(unpack_mode, pack_unpack_select, pack_mode, cond_add, cond_mul, sf, ws, waddr_add, waddr_mul, op_mul, op_add, raddr_a, raddr_b, add_a, add_b, mul_a, mul_b);
instructions[instruction_counter] = encode_alu_small_imm(unpack_mode, pack_unpack_select, pack_mode, cond_add, cond_mul, sf, ws, waddr_add, waddr_mul, op_add, op_mul, raddr_a, raddr_b, add_a, add_b, mul_a, mul_b);
}
else
{
instructions[instruction_counter] = encode_alu(sig_bit, unpack_mode, pack_unpack_select, pack_mode, cond_add, cond_mul, sf, ws, waddr_add, waddr_mul, op_mul, op_add, raddr_a, raddr_b, add_a, add_b, mul_a, mul_b);
instructions[instruction_counter] = encode_alu(sig_bit, unpack_mode, pack_unpack_select, pack_mode, cond_add, cond_mul, sf, ws, waddr_add, waddr_mul, op_add, op_mul, raddr_a, raddr_b, add_a, add_b, mul_a, mul_b);
}
}
else if(type == QPU_SEM)

View File

@ -20,8 +20,8 @@ uint64_t encode_alu(qpu_sig_bits sig_bits,
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
qpu_op_mul op_mul,
qpu_op_add op_add,
qpu_op_mul op_mul,
qpu_raddr raddr_a,
qpu_raddr raddr_b,
qpu_mux add_a,
@ -38,8 +38,8 @@ uint64_t encode_alu_small_imm(qpu_unpack unpack_mode,
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
qpu_op_mul op_mul,
qpu_op_add op_add,
qpu_op_mul op_mul,
qpu_raddr raddr_a,
uint8_t small_imm,
qpu_mux add_a,

View File

@ -782,6 +782,25 @@ void encodeStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpState
}
}
uint32_t encodeVPMSetup(uint8_t stride,
uint8_t direction, //0 vertical, 1 horizontal
uint8_t isLaned, //0 packed, 1 laned
uint8_t size, //0 8bit, 1 16bit, 2 32bit
uint8_t address, //see doc
uint8_t vectorComponentsToRead //only used for VPM read setup
)
{
uint32_t res = 0;
res |= ((uint32_t)(vectorComponentsToRead) & 0xf) << 20;
res |= ((uint32_t)(stride) & 0x3f) << 12;
res |= ((uint32_t)(direction) & 0x1) << 11;
res |= ((uint32_t)(isLaned) & 0x1) << 10;
res |= ((uint32_t)(size) & 0x3) << 8;
res |= (uint32_t)(address) & 0xff;
return res;
}
uint8_t getTextureDataType(VkFormat format)
{
switch(format)

View File

@ -494,6 +494,12 @@ void encodeTextureUniform(uint32_t* params,
uint8_t wrapS,
uint8_t noAutoLod);
void encodeStencilValue(uint32_t* values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back, uint8_t stencilTestEnable);
uint32_t encodeVPMSetup(uint8_t stride,
uint8_t direction,
uint8_t isLaned,
uint8_t size,
uint8_t address,
uint8_t vectorComponentsToRead);
uint8_t getTextureDataType(VkFormat format);
uint8_t getMinFilterType(VkFilter minFilter, VkSamplerMipmapMode mipFilter, float maxLod);
uint8_t getWrapMode(VkSamplerAddressMode mode);

View File

@ -47,13 +47,13 @@ void patchShaderDepthStencilBlending(uint64_t** instructions, uint32_t* size, co
for(uint32_t c = 0; c < numValues; ++c)
{
tmp[c] = encode_load_imm(0, 0, 1, 0, 0, 0, 32 + c, 39, values[c]); //r0 = load32.always(values[c])
tmp[numValues + c] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 43, 39, 0, 21, 0, 0, c, c, 0, 0); //tlb_stencil_setup = or.always(r0, r0)
tmp[numValues + c] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 43, 39, 21, 0, 0, 0, c, c, 0, 0); //tlb_stencil_setup = or.always(r0, r0)
}
///"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
if(dsi->depthWriteEnable || dsi->stencilTestEnable)
{
tmp[numValues*2] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 44, 39, 0, 21, 0, 15, 7, 7, 0, 0);
tmp[numValues*2] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 44, 39, 21, 0, 0, 15, 7, 7, 0, 0);
}
@ -63,14 +63,18 @@ void patchShaderDepthStencilBlending(uint64_t** instructions, uint32_t* size, co
if(bas->blendEnable)
{
/// find last instruction that wrote to tlb_color_all
/// patch shader so that r0 will contain whatever would be written to tlb_color_all
/// r0 contains sRGBA
//"sig_none ; r0 = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;"
uint64_t instruction;
/// load dRGBA to r1
/// load tbl color dRGBA to r4
//"sig_color_load ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
//"sig_none ; nop = nop(r0, r0) ; r1 = v8min.always(r4, r4) ;"
assemble_qpu_asm("sig_color_load ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;", &instruction);
assemble_qpu_asm("sig_none ; r1 = or.always(r4, r4) ; nop = nop(r0, r0) ;", &instruction);
//if factors are not separate
if(bas->srcAlphaBlendFactor == bas->srcColorBlendFactor &&
@ -79,132 +83,60 @@ void patchShaderDepthStencilBlending(uint64_t** instructions, uint32_t* size, co
switch(bas->srcAlphaBlendFactor)
{
case VK_BLEND_FACTOR_ZERO:
/// if Sfactor is ZERO
//"sig_small_imm ; r2 = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_small_imm ; r2 = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_ONE:
/// if Sfactor is ONE
//"sig_small_imm ; r2 = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_small_imm ; r2 = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_SRC_COLOR:
/// if Sfactor is sCOLOR
//"sig_none ; r2 = or.always(r0, r0) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2 = or.always(r0, r0) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
/// if Sfactor is 1-sCOLOR
//"sig_none ; r2 = not.always(r0, r0) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2 = not.always(r0, r0) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_DST_COLOR:
/// if Sfactor is dCOLOR
//"sig_none ; r2 = or.always(r1, r1) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2 = or.always(r1, r1) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
/// if Sfactor is 1-dCOLOR
//"sig_none ; r2 = not.always(r1, r1) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2 = not.always(r1, r1) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_SRC_ALPHA:
/// if Sfactor is sALPHA
//"sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
/// if Sfactor is 1-sALPHA
//"sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;"
//"sig_none ; r2 = not.always(r2, r2) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;", &instruction);
assemble_qpu_asm("sig_none ; r2 = not.always(r2, r2) ; nop = nop(r0, r0) ;", &instruction);
case VK_BLEND_FACTOR_DST_ALPHA:
/// if Sfactor is dALPHA
//"sig_none ; r2.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
/// if Sfactor is 1-dALPHA
//"sig_none ; r2.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;"
//"sig_none ; r2 = not.always(r2, r2) ; nop = nop(r0, r0) ;"
assemble_qpu_asm("sig_none ; r2.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;", &instruction);
assemble_qpu_asm("sig_none ; r2 = not.always(r2, r2) ; nop = nop(r0, r0) ;", &instruction);
break;
case VK_BLEND_FACTOR_CONSTANT_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
case VK_BLEND_FACTOR_CONSTANT_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
/// if Sfactor is cCOLOR, 1-cCOLOR, cALPHA, 1-cALPHA = 0xffffffff
//"sig_load_imm ; r2 = load32.always(0xffffffff) ; nop = load32() ;"
assemble_qpu_asm("sig_load_imm ; r2 = load32.always(0xffffffff) ; nop = load32() ;", &instruction);
break;
case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
/// if Sfactor is sALPHASat
//"sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;" //sAAAA
//"sig_none ; r3.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;" //dAAAA
//"sig_none ; r3 = not.always(r3, r3) ; nop = nop(r0, r0) ;" //1-dAAAA
//"sig_none ; nop = nop(r0, r0) ; r2 = v8min.always(r2, r3) ;" //min(sAAAA, 1-dAAAA)
//"sig_load_imm ; r3 = load32.always(0xff000000) ; nop = load32() ;" //load alpha = 1
//"sig_small_imm ; r2 = or.always(r2, r3) ; nop = nop(r0, r0) ;" //set alpha to 1
assemble_qpu_asm("sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;", &instruction); //sAAAA
assemble_qpu_asm("sig_none ; r3.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;", &instruction); //dAAAA
assemble_qpu_asm("sig_none ; r3 = not.always(r3, r3) ; nop = nop(r0, r0) ;", &instruction); //1-dAAAA
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; r2 = v8min.always(r2, r3) ;", &instruction); //min(sAAAA, 1-dAAAA)
assemble_qpu_asm("sig_load_imm ; r3 = load32.always(0xff000000) ; nop = load32() ;", &instruction); //load alpha = 1
assemble_qpu_asm("sig_small_imm ; r2 = or.always(r2, r3) ; nop = nop(r0, r0) ;", &instruction); //set alpha to 1
break;
}
/// Multiply sRGBA and source factor
//"sig_none ; nop = nop(r0, r0) ; r0 = v8muld.always(r0, r2) ;"
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; r0 = v8muld.always(r0, r2) ;", &instruction);
switch(bas->dstAlphaBlendFactor)
{
case VK_BLEND_FACTOR_ZERO:
/// if Dfactor is ZERO
//"sig_small_imm ; r2 = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_ONE:
/// if Dfactor is ONE
//"sig_small_imm ; r2 = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_SRC_COLOR:
/// if Dfactor is sCOLOR
//"sig_none ; r2 = or.always(r0, r0) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
/// if Dfactor is 1-sCOLOR
//"sig_none ; r2 = not.always(r0, r0) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_DST_COLOR:
/// if Dfactor is dCOLOR
//"sig_none ; r2 = or.always(r1, r1) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
/// if Dfactor is 1-dCOLOR
//"sig_none ; r2 = not.always(r1, r1) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_SRC_ALPHA:
/// if Dfactor is sALPHA
//"sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
/// if Dfactor is 1-sALPHA
//"sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;"
//"sig_none ; r2 = not.always(r2, r2) ; nop = nop(r0, r0) ;"
case VK_BLEND_FACTOR_DST_ALPHA:
/// if Dfactor is dALPHA
//"sig_none ; r2.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
/// if Dfactor is 1-dALPHA
//"sig_none ; r2.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;"
//"sig_none ; r2 = not.always(r2, r2) ; nop = nop(r0, r0) ;"
break;
case VK_BLEND_FACTOR_CONSTANT_COLOR:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
case VK_BLEND_FACTOR_CONSTANT_ALPHA:
case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
/// if Dfactor is cCOLOR, 1-cCOLOR, cALPHA, 1-cALPHA = 0xffffffff
//"sig_load_imm ; r2 = load32.always(0xffffffff) ; nop = load32() ;"
break;
case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
/// if Dfactor is sALPHASat
//"sig_none ; r2.8888 = or.always.8d(r0, r0) ; nop = nop(r0, r0) ;" //sAAAA
//"sig_none ; r3.8888 = or.always.8d(r1, r1) ; nop = nop(r0, r0) ;" //dAAAA
//"sig_none ; r3 = not.always(r3, r3) ; nop = nop(r0, r0) ;" //1-dAAAA
//"sig_none ; nop = nop(r0, r0) ; r2 = v8min.always(r2, r3) ;" //min(sAAAA, 1-dAAAA)
//"sig_load_imm ; r3 = load32.always(0xff000000) ; nop = load32() ;" //load alpha = 1
//"sig_small_imm ; r2 = or.always(r2, r3) ; nop = nop(r0, r0) ;" //set alpha to 1
break;
}
///repeat for
//bas->dstAlphaBlendFactor
/// Multiply dRGBA and destination factor
//"sig_none ; nop = nop(r0, r0) ; r1 = v8muld.always(r1, r2) ;"
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; r1 = v8muld.always(r1, r2) ;", &instruction);
}
else //separate factors
{
@ -214,31 +146,21 @@ void patchShaderDepthStencilBlending(uint64_t** instructions, uint32_t* size, co
switch(bas->alphaBlendOp)
{
case VK_BLEND_OP_ADD:
/// If Equation is ADD:
//"sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8adds.always(r0, r1) ;"
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8adds.always(r0, r1) ;", &instruction);
break;
case VK_BLEND_OP_SUBTRACT:
/// If Equation is SUB:
//"sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8subs.always(r0, r1) ;"
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8subs.always(r0, r1) ;", &instruction);
break;
case VK_BLEND_OP_REVERSE_SUBTRACT:
/// If Equation is rSUB:
//"sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8subs.always(r1, r0) ;"
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8subs.always(r1, r0) ;", &instruction);
break;
case VK_BLEND_OP_MIN:
/// If Equation is MIN:
//"sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8min.always(r0, r1) ;"
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8min.always(r0, r1) ;", &instruction);
break;
case VK_BLEND_OP_MAX:
/// If Equation is MAX:
//"sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8max.always(r0, r1) ;"
assemble_qpu_asm("sig_none ; nop = nop(r0, r0) ; tlb_color_all = v8max.always(r0, r1) ;", &instruction);
break;
}
/// retain nops etc.
//"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
//"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
//"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
}
//replace instructions pointer
@ -294,7 +216,9 @@ VkResult rpi_vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipeline
//patch fragment shader
if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_FRAGMENT_BIT)
{
patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pCreateInfos[c].pColorBlendState->pAttachments, pAllocator);
//TODO we could patch the fragment shader, but it would have a lot of edge cases
//since the user is writing assembly we can just let them have full control
//patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pCreateInfos[c].pColorBlendState->pAttachments, pAllocator);
//TODO if debug...
for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT] / 8; ++e)