1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-02-20 17:54:17 +01:00

added howto section for the qpu assembly

This commit is contained in:
Unknown 2019-04-20 13:55:40 +01:00
parent 9cb1f24cf2
commit 724f63f12a
2 changed files with 105 additions and 27 deletions

View File

@ -34,6 +34,109 @@
write, SFU write, Mutex read or Semaphore access.
*********************************************************************************************************************/
/*
Format:
#comment
sig_bit_opt ; dstAdd.pack_opt = add_op.pm_opt.sf_opt.cond.unpack_opt.ws_opt(srcA, srcB, raddr_a_opt, raddr_b_opt) ; dstMul.pack_opt = mul_op.cond(srcA, srcB) ;
sig_small_imm ; dstAdd.pack_opt = add_op.pm_opt.sf_opt.cond.unpack_opt.ws_opt(srcA, srcB, raddr_a_opt, small_imm) ; dstMul.pack_opt = mul_op.cond(srcA, srcB) ;
sig_branch ; dstAdd = branch.rel_opt.reg_opt.ws_opt(address, condition, raddr_a_opt) ; dstMul = branch() ;
sig_load_imm ; dstAdd.pack_opt = sem_inc.pm_opt.sf_opt.cond.ws_opt(sem_number, 27bit_imm_opt) ; dstMul.pack_opt = sem_inc.cond() ;
sig_load_imm ; dstAdd.pack_opt = load32.pm_opt.sf_opt.cond.ws_opt(immediate32bit_value) ; dstMul.pack_opt = load32.cond() ;
sig_load_imm ; dstAdd.pack_opt = load16.pm_opt.signed_opt.sf_opt.cond.ws_opt(int16_imm, int16_imm) ; dstMul.pack_opt = load16.cond() ;
==================================================================
================How to formulate instructions:====================
==================================================================
1)
You must specify the signal bits at the beginning of each instruction:
sig_brk, sig_none, sig_switch, sig_end, sig_wait_score, sig_unlock_score, sig_thread_switch, sig_coverage_load,
sig_color_load, sig_color_load_end, sig_load_tmu0, sig_load_tmu1, sig_alpha_mask_load, sig_small_imm, sig_load_imm, sig_branch
2)
Then you must specify the output register for the ADD pipeline.
rx0-31, r0-3, r5, tmu_noswap, host_int, nop, uniforms_addr, quad_x, quad_y, ms_flags, rev_flags, tlb_stencil_setup
tlb_z, tlb_color_ms, tlb_color_all, vpm, vr_setup, vr_addr, mutex_release, sfu_recip, sfu_recipsqrt, sfu_exp,
sfu_log, tmu0_s, tmu0_t, tmu0_r, tmu0_b, tmu1_s, tmu1_t, tmu1_r, tmu1_b
3)
If the ADD instruction writes to regfile A (ie. you don't specify the WS flag later) and PM flag won't be specified,
then you can specify the pack mode for regfile A here (omitting means nop)
nop, 16a, 16b, 8888, 8a, 8b, 8c, 8d, sta, 16a.sat, 16b.sat, 8888.sat, 8a.sat, 8b.sat, 8c.sat, 8d.sat
4)
Then you must specify your operation for the ADD pipeline. If you are writing a non-ALU instruction, you can specify either
branch, sem_inc, sem_dec, load32 or load16 here instead.
Operations available:
nop, fadd, fsub, fmin, fmax, fminabs, fmaxabs, ftoi, itof, add, sub, shr, asr, ror, shl, min, max, and, or, xor, not, clz, v8adds, v8subs
5)
Then you can specify a range of modifiers (order is not important):
PM bit: pm
SF bit: sf
WS bit: ws
REL bit: rel
REG bit: reg
SIGNED bit: signed
Conditional execution for the ADD pipeline: never, always, zs, zc, ns, nc, cs, cc
Unpack modes (from regfile A, or if PM is set from R4): nop, 16a, 16b, 8d_rep, 8a, 8b, 8c, 8d
6)
Then you must specify the arguments for the ALU operation.
srcA, srcB can be: r0-r5 or a, b for regfiles A and B, or imm for the small immediate value.
raddr_a and raddr_b can be specified afterwards as optional extra arguments (omitting means nop).
raddr_a: ra0-31, pay_zw, uni, vary, elem, nop, x_pix, ms_flags, vpm_read, vpm_ld_busy, vpm_ld_wait, mutex_acq
raddr_b: rb0-31, pay_zw, uni, vary, elem, nop, y_pix, rev_flag, vpm_read, vpm_st_busy, vpm_st_wait, mutex_acq
For branch operation, you must specify:
the jump address as a 32bit value (can be relative if REL is set)
the branch condition: all_zs, all_zc, any_zs, any_zc, all_ns, all_nc, any_ns, any_nc, all_cs, all_cc, any_cs, any_cc, always
and an optional raddr_a (if REG flag is set), see above
For a semaphore instruction, you need to specify which semaphore (0-15) you want to modify, then an optional 27bit immediate value (ms 16bits might be usable...).
7)
Then you must specify the output register for the MUL pipeline.
See above for options.
8)
If the MUL instruction writes to regfile A (ie. you specify the WS flag) then you can set the pack operation for regfile A here:
nop, 16a, 16b, 8888, 8a, 8b, 8c, 8d, sta, 16a.sat, 16b.sat, 8888.sat, 8a.sat, 8b.sat, 8c.sat, 8d.sat
OR
You if specify the PM flag, then you can set the pack operation for the MUL output here:
nop, 8888, 8a, 8b, 8c, 8d
9)
Then you must specify your operation for the MUL pipeline. If you are writing a non-ALU instruction, you can specify either
branch, sem_inc, sem_dec, load32 or load16 here instead.
Operations available:
nop, fmul, mul24, v8muld, v8min, v8max, v8adds, v8subs
10)
Then you can specify a range of modifiers (order is not important):
Conditional execution for the MUL pipeline: never, always, zs, zc, ns, nc, cs, cc
11)
Then you must specify the arguments for the ALU operation.
srcA, srcB can be: r0-r5 or a, b for regfiles A and B, or imm for the small immediate value.
==================================================================
==================================================================
dstAdd: rx0-31, r0-5, special regs
raddr_a_opt: ra0-31, r0-5, special regs
raddr_b_opt: rb0-31, r0-5, special regs
Examples:
sig_none ; rx0.nop = add.pm.sf.always(r0, r1, 0) ; rx0.nop = fmul.always(r2, r3) ;
sig_branch ; rx0 = branch.pm.rel.reg.always(0xdeadbeef, ra1) ; rx0 = branch() ;
sig_none ; rx0.nop = sem_inc.pm.sf.always(1, 0x7ffffff) ; rx0.nop = sem_inc.always() ;
sig_load_imm ; rx0.nop = load32.pm.sf.always(0xdeadbeef) ; rx0.nop = load32.always() ;
sig_load_imm ; rx0.nop = load16.pm.sf.signed.always(1, 2) ; rx0.nop = load16.always() ;
#mov
sig_none ; rx0.nop = or(r0, r0) ; rx0 = v8min(r1, r1) ;
*/
uint64_t encode_alu(qpu_sig_bits sig_bits,
qpu_unpack unpack_mode,
//If the pm bit is set, the unpack field programs the r4 unpack unit,
@ -1338,33 +1441,6 @@ void disassemble_qpu_asm(uint64_t instruction)
printf("\n");
}
/*
Format:
#comment
sig_bit_opt ; dstAdd.pack_opt = add_op.pm_opt.sf_opt.cond.unpack_opt.ws_opt(srcA, srcB, raddr_a_opt, raddr_b_opt) ; dstMul.pack_opt = mul_op.cond(srcA, srcB) ;
sig_small_imm ; dstAdd.pack_opt = add_op.pm_opt.sf_opt.cond.unpack_opt.ws_opt(srcA, srcB, raddr_a_opt, small_imm) ; dstMul.pack_opt = mul_op.cond(srcA, srcB) ;
sig_branch ; dstAdd = branch.rel_opt.reg_opt.ws_opt(address, condition, srcA_opt) ; dstMul = branch() ;
sig_load_imm ; dstAdd.pack_opt = sem_inc.pm_opt.sf_opt.cond.ws_opt(sem_number, 27bit_imm_opt) ; dstMul.pack_opt = sem_inc.cond() ;
sig_load_imm ; dstAdd.pack_opt = load32.pm_opt.sf_opt.cond.ws_opt(immediate_value) ; dstMul.pack_opt = load32.cond() ;
sig_load_imm ; dstAdd.pack_opt = load16.pm_opt.signed_opt.sf_opt.cond.ws_opt(int16_imm, in16_imm) ; dstMul.pack_opt = load16.cond() ;
You can specify the signal bits at the beginning of each instruction.
dstAdd: rx0-31, r0-5, special regs
raddr_a_opt: ra0-31, r0-5, special regs
raddr_b_opt: rb0-31, r0-5, special regs
Examples:
sig_none ; rx0.nop = add.pm.sf.always(r0, r1, 0) ; rx0.nop = fmul.always(r2, r3) ;
sig_branch ; rx0 = branch.pm.rel.reg.always(0xdeadbeef, ra1) ; rx0 = branch() ;
sig_none ; rx0.nop = sem_inc.pm.sf.always(1, 0x7ffffff) ; rx0.nop = sem_inc.always() ;
sig_load_imm ; rx0.nop = load32.pm.sf.always(0xdeadbeef) ; rx0.nop = load32.always() ;
sig_load_imm ; rx0.nop = load16.pm.sf.signed.always(1, 2) ; rx0.nop = load16.always() ;
#mov
sig_none ; rx0.nop = or(r0, r0) ; rx0 = v8min(r1, r1) ;
*/
int main()
{
char asm_code[] =

View File

@ -411,6 +411,7 @@ typedef enum {
static const char *qpu_raddr_str[][52] = {
{ //A
//ra0-31
[QPU_R_FRAG_PAYLOAD_ZW] = "pay_zw",
[QPU_R_UNIF] = "uni",
[QPU_R_VARY] = "vary",
@ -424,6 +425,7 @@ static const char *qpu_raddr_str[][52] = {
[QPU_R_MUTEX_ACQUIRE] = "mutex_acq"
},
{ //B
//rb0-31
[QPU_R_FRAG_PAYLOAD_ZW] = "pay_zw",
[QPU_R_UNIF] = "uni",
[QPU_R_VARY] = "vary",