From ef5227a2be58eda473cb39085b8b65512d8ae286 Mon Sep 17 00:00:00 2001 From: Unknown <0.tamas.marton@gmail.com> Date: Sun, 21 Apr 2019 13:41:25 +0100 Subject: [PATCH] using assembly as a fragment shader works --- QPUassembler/main.c | 13 +++++-- QPUassembler/qpu_assembler.c | 2 - QPUassembler/vc4_qpu_defines.h | 39 ++++++++----------- driver/command.c | 2 + driver/draw.c | 9 +++-- test/triangle/triangle.cpp | 70 ++++++++++++++++++++++------------ 6 files changed, 79 insertions(+), 56 deletions(-) diff --git a/QPUassembler/main.c b/QPUassembler/main.c index 95a5bf4..502016a 100644 --- a/QPUassembler/main.c +++ b/QPUassembler/main.c @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -7,8 +7,13 @@ int main() { char asm_code[] = - "sig_none ; rx0 = add(r2, r3) ; rx1 = fmul(r2, r3) ;\n" - "\0"; + "sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" + "sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" + "sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;" + "sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" + "sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" + "sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" + "\0"; printf("%s", asm_code); @@ -35,7 +40,7 @@ int main() return 0; } - uint64_t* instruction_size = sizeof(uint64_t)*num_instructions; + uint64_t instruction_size = sizeof(uint64_t)*num_instructions; uint64_t* instructions = malloc(instruction_size); assemble_qpu_asm(asm_code, instructions); diff --git a/QPUassembler/qpu_assembler.c b/QPUassembler/qpu_assembler.c index da3d610..47c12eb 100644 --- a/QPUassembler/qpu_assembler.c +++ b/QPUassembler/qpu_assembler.c @@ -718,7 +718,6 @@ void parse_args_alu(char** str, qpu_mux* in_a, qpu_mux* in_b, uint8_t* raddr_a, char* arg = strtok(*str, " \n\v\f\r\t,"); unsigned num_muxes = sizeof(qpu_mux_str) / sizeof(const char *); - unsigned found = 0; for(unsigned c = 0; c < num_muxes && arg; ++c) { @@ -726,7 +725,6 @@ void parse_args_alu(char** str, qpu_mux* in_a, qpu_mux* in_b, uint8_t* raddr_a, { *str = arg; *in_a = c; - found = 1; break; } } diff --git a/QPUassembler/vc4_qpu_defines.h b/QPUassembler/vc4_qpu_defines.h index 7b1d27e..cbd5360 100644 --- a/QPUassembler/vc4_qpu_defines.h +++ b/QPUassembler/vc4_qpu_defines.h @@ -79,12 +79,6 @@ typedef enum { QPU_MUX_R5, //special purpose QPU_MUX_A, QPU_MUX_B, - - /** - * Non-hardware mux value, stores a small immediate field to be - * programmed into raddr_b in the qpu_reg.index. - */ - QPU_MUX_SMALL_IMM, } qpu_mux; static const char *qpu_mux_str[] = { @@ -96,7 +90,6 @@ static const char *qpu_mux_str[] = { [QPU_MUX_R5] = "r5", [QPU_MUX_A] = "a", [QPU_MUX_B] = "b", - [QPU_MUX_SMALL_IMM] = "imm", }; @@ -154,37 +147,37 @@ uint8_t qpu_encode_small_immediate(uint32_t i) return i + 32; switch (i) { - case 0x3f800000: + case 0x3f800000: //1.0 return 32; - case 0x40000000: + case 0x40000000: //2.0 return 33; - case 0x40800000: + case 0x40800000: //4.0 return 34; - case 0x41000000: + case 0x41000000: //8.0 return 35; - case 0x41800000: + case 0x41800000: //16.0 return 36; - case 0x42000000: + case 0x42000000: //32.0 return 37; - case 0x42800000: + case 0x42800000: //64.0 return 38; - case 0x43000000: + case 0x43000000: //128.0 return 39; - case 0x3b800000: + case 0x3b800000: //1.0/256.0 return 40; - case 0x3c000000: + case 0x3c000000: //1.0/128.0 return 41; - case 0x3c800000: + case 0x3c800000: //1.0/64.0 return 42; - case 0x3d000000: + case 0x3d000000: //1.0/32.0 return 43; - case 0x3d800000: + case 0x3d800000: //1.0/16.0 return 44; - case 0x3e000000: + case 0x3e000000: //1.0/8.0 return 45; - case 0x3e800000: + case 0x3e800000: //1.0/4.0 return 46; - case 0x3f000000: + case 0x3f000000: //1.0/2.0 return 47; } diff --git a/driver/command.c b/driver/command.c index 1b313d0..5514ae0 100644 --- a/driver/command.c +++ b/driver/command.c @@ -312,6 +312,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer; cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl); + /**/ printf("BCL:\n"); clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size); printf("BO handles: "); @@ -331,6 +332,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( printf("clear z %u\n", cmdbuf->submitCl.clear_z); printf("clear s %u\n", cmdbuf->submitCl.clear_s); printf("flags %u\n", cmdbuf->submitCl.flags); + /**/ //submit ioctl diff --git a/driver/draw.c b/driver/draw.c index 8e250f6..a9ac570 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -227,7 +227,7 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins //FS uniform count : 1 tex sample count : 0 - uniform constant : 4291579008 + uniform constant : 4291579008 (color: #CC4C80, alpha FF) //VS uniform count : 4 @@ -245,9 +245,12 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins uniform viewport xscale : 15360.000000 uniform viewport zoffset : 0.500000 /**/ - clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(1+4+4)); + + //TODO: if fragment shader doesn't use any uniforms, then VS will expect to read the first uniform in the stream + //clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(1+4+4)); + clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(4+4)); //FS - clInsertUniformConstant(&commandBuffer->uniformsCl, 4291579008); + //clInsertUniformConstant(&commandBuffer->uniformsCl, 4291579008); //VS clInsertUniformConstant(&commandBuffer->uniformsCl, 1065353216); clInsertUniformXYScale(&commandBuffer->uniformsCl, (float)(i->width) * 0.5f * 16.0f); diff --git a/test/triangle/triangle.cpp b/test/triangle/triangle.cpp index f306722..c69d901 100644 --- a/test/triangle/triangle.cpp +++ b/test/triangle/triangle.cpp @@ -35,33 +35,52 @@ const char* vertShader = const char fragBytes[] = { - 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, - 0x80, 0x7d, 0x82, 0x15, 0xa7, 0xb, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30, - 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x50 + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, + 0x80, 0x7d, 0x82, 0x15, 0xa7, 0xb, 0x2, 0x10, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x50 }; const char vertBytes[] = { - 0x80, 0x1f, 0x82, 0x2, 0x27, 0x10, 0x2, 0xd0, 00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0, - 0x37, 00, 0xc2, 0x20, 0xe0, 0x49, 00, 0x10, 0x7, 00, 0x9c, 0x20, 0xe1, 0x49, 00, 0x10, - 0x77, 0x2, 0xc2, 0x27, 0x22, 0x40, 0x12, 0x10, 0x17, 00, 0x9c, 0x20, 0xe3, 0x49, 00, 0x10, - 0xc0, 0x76, 0x9e, 0x7, 0x27, 00, 0x22, 0x10, 00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0, - 0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10, - 0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30, - 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, + 0x80, 0x1f, 0x82, 0x2, 0x27, 0x10, 0x2, 0xd0, + 00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0, + 0x37, 00, 0xc2, 0x20, 0xe0, 0x49, 00, 0x10, + 0x7, 00, 0x9c, 0x20, 0xe1, 0x49, 00, 0x10, + 0x77, 0x2, 0xc2, 0x27, 0x22, 0x40, 0x12, 0x10, + 0x17, 00, 0x9c, 0x20, 0xe3, 0x49, 00, 0x10, + 0xc0, 0x76, 0x9e, 0x7, 0x27, 00, 0x22, 0x10, + 00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0, + 0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, + 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10, + 0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0x10, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, }; const char coordinateBytes[] = { - 00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0, 0x80, 0x7d, 0xc2, 0x15, 0xa7, 0x8, 0x2, 0x10, - 00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0, 0x92, 0x7d, 0xc2, 0x95, 0xf0, 0x48, 0x2, 0x10, - 0xde, 0x76, 0x82, 0x35, 0x21, 0x4c, 0x2, 0x10, 0x80, 0x1f, 0x82, 0x2, 0xe7, 0x8, 0x2, 0xd0, - 0x16, 0x70, 0x82, 0x20, 0xe2, 0x49, 00, 0x10, 0x13, 0x70, 0x9e, 0x20, 0xe0, 0x49, 00, 0x10, - 0xb, 0x70, 0x9e, 0x27, 0x21, 0x40, 0x12, 0x10, 0x40, 0x72, 0x9e, 0x7, 0x27, 00, 0x22, 0x10, - 0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0xd0, 0xc0, 0xf, 0x9e, 0x15, 0x27, 0xc, 0x2, 0xd0, - 0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10, - 0xc0, 0x76, 0x9e, 0x15, 0x27, 0xc, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30, - 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, + 00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0, + 0x80, 0x7d, 0xc2, 0x15, 0xa7, 0x8, 0x2, 0x10, + 00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0, + 0x92, 0x7d, 0xc2, 0x95, 0xf0, 0x48, 0x2, 0x10, + 0xde, 0x76, 0x82, 0x35, 0x21, 0x4c, 0x2, 0x10, + 0x80, 0x1f, 0x82, 0x2, 0xe7, 0x8, 0x2, 0xd0, + 0x16, 0x70, 0x82, 0x20, 0xe2, 0x49, 00, 0x10, + 0x13, 0x70, 0x9e, 0x20, 0xe0, 0x49, 00, 0x10, + 0xb, 0x70, 0x9e, 0x27, 0x21, 0x40, 0x12, 0x10, + 0x40, 0x72, 0x9e, 0x7, 0x27, 00, 0x22, 0x10, + 0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0xd0, + 0xc0, 0xf, 0x9e, 0x15, 0x27, 0xc, 0x2, 0xd0, + 0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, + 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10, + 0xc0, 0x76, 0x9e, 0x15, 0x27, 0xc, 0x2, 0x10, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, + 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, }; // Note: support swap chain recreation (not only required for resized windows!) @@ -848,10 +867,12 @@ VkShaderModule VulkanCreateShaderModule(VkDevice& device, char* byteStream, uint void CreateShaders() { + //clever: use small immedate -1 interpreted as 0xffffffff (white) to set color to white + //"sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;" + char asm_code[] = "sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" - "sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" - "sig_load_imm ; r0 = load32.always(0xffffffff) ; nop = load32() ;" + "sig_load_imm ; r0 = load32.always(0xffa14ccc) ; nop = load32() ;" "sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;" "sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" "sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" @@ -881,14 +902,15 @@ void CreateShaders() for(uint64_t c = 0; c < num_instructions; ++c) { + printf("%#llx ", instructions[c]); disassemble_qpu_asm(instructions[c]); } char* vptr = (char*)malloc(sizeof(vertBytes)); memcpy(vptr, vertBytes, sizeof(vertBytes)); - //char* fptr = (char*)malloc(sizeof(fragBytes)); - //memcpy(fptr, fragBytes, sizeof(fragBytes)); + char* fptr = (char*)malloc(sizeof(fragBytes)); + memcpy(fptr, fragBytes, sizeof(fragBytes)); char* cptr = (char*)malloc(sizeof(coordinateBytes)); memcpy(cptr, coordinateBytes, sizeof(coordinateBytes)); @@ -930,7 +952,7 @@ void CreateShaders() assert(fsModule); free(vptr); - //free(fptr); + free(fptr); free(cptr); }