1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-01-18 10:52:14 +01:00

using assembly as a fragment shader works

This commit is contained in:
Unknown 2019-04-21 13:41:25 +01:00
parent 9283288a7a
commit ef5227a2be
6 changed files with 79 additions and 56 deletions

View File

@ -1,4 +1,4 @@
#include <stdio.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@ -7,8 +7,13 @@
int main()
{
char asm_code[] =
"sig_none ; rx0 = add(r2, r3) ; rx1 = fmul(r2, r3) ;\n"
"\0";
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"\0";
printf("%s", asm_code);
@ -35,7 +40,7 @@ int main()
return 0;
}
uint64_t* instruction_size = sizeof(uint64_t)*num_instructions;
uint64_t instruction_size = sizeof(uint64_t)*num_instructions;
uint64_t* instructions = malloc(instruction_size);
assemble_qpu_asm(asm_code, instructions);

View File

@ -718,7 +718,6 @@ void parse_args_alu(char** str, qpu_mux* in_a, qpu_mux* in_b, uint8_t* raddr_a,
char* arg = strtok(*str, " \n\v\f\r\t,");
unsigned num_muxes = sizeof(qpu_mux_str) / sizeof(const char *);
unsigned found = 0;
for(unsigned c = 0; c < num_muxes && arg; ++c)
{
@ -726,7 +725,6 @@ void parse_args_alu(char** str, qpu_mux* in_a, qpu_mux* in_b, uint8_t* raddr_a,
{
*str = arg;
*in_a = c;
found = 1;
break;
}
}

View File

@ -79,12 +79,6 @@ typedef enum {
QPU_MUX_R5, //special purpose
QPU_MUX_A,
QPU_MUX_B,
/**
* Non-hardware mux value, stores a small immediate field to be
* programmed into raddr_b in the qpu_reg.index.
*/
QPU_MUX_SMALL_IMM,
} qpu_mux;
static const char *qpu_mux_str[] = {
@ -96,7 +90,6 @@ static const char *qpu_mux_str[] = {
[QPU_MUX_R5] = "r5",
[QPU_MUX_A] = "a",
[QPU_MUX_B] = "b",
[QPU_MUX_SMALL_IMM] = "imm",
};
@ -154,37 +147,37 @@ uint8_t qpu_encode_small_immediate(uint32_t i)
return i + 32;
switch (i) {
case 0x3f800000:
case 0x3f800000: //1.0
return 32;
case 0x40000000:
case 0x40000000: //2.0
return 33;
case 0x40800000:
case 0x40800000: //4.0
return 34;
case 0x41000000:
case 0x41000000: //8.0
return 35;
case 0x41800000:
case 0x41800000: //16.0
return 36;
case 0x42000000:
case 0x42000000: //32.0
return 37;
case 0x42800000:
case 0x42800000: //64.0
return 38;
case 0x43000000:
case 0x43000000: //128.0
return 39;
case 0x3b800000:
case 0x3b800000: //1.0/256.0
return 40;
case 0x3c000000:
case 0x3c000000: //1.0/128.0
return 41;
case 0x3c800000:
case 0x3c800000: //1.0/64.0
return 42;
case 0x3d000000:
case 0x3d000000: //1.0/32.0
return 43;
case 0x3d800000:
case 0x3d800000: //1.0/16.0
return 44;
case 0x3e000000:
case 0x3e000000: //1.0/8.0
return 45;
case 0x3e800000:
case 0x3e800000: //1.0/4.0
return 46;
case 0x3f000000:
case 0x3f000000: //1.0/2.0
return 47;
}

View File

@ -312,6 +312,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
/**/
printf("BCL:\n");
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
printf("BO handles: ");
@ -331,6 +332,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
printf("flags %u\n", cmdbuf->submitCl.flags);
/**/
//submit ioctl

View File

@ -227,7 +227,7 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
//FS
uniform count : 1
tex sample count : 0
uniform constant : 4291579008
uniform constant : 4291579008 (color: #CC4C80, alpha FF)
//VS
uniform count : 4
@ -245,9 +245,12 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
uniform viewport xscale : 15360.000000
uniform viewport zoffset : 0.500000
/**/
clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(1+4+4));
//TODO: if fragment shader doesn't use any uniforms, then VS will expect to read the first uniform in the stream
//clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(1+4+4));
clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(4+4));
//FS
clInsertUniformConstant(&commandBuffer->uniformsCl, 4291579008);
//clInsertUniformConstant(&commandBuffer->uniformsCl, 4291579008);
//VS
clInsertUniformConstant(&commandBuffer->uniformsCl, 1065353216);
clInsertUniformXYScale(&commandBuffer->uniformsCl, (float)(i->width) * 0.5f * 16.0f);

View File

@ -35,33 +35,52 @@ const char* vertShader =
const char fragBytes[] =
{
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
0x80, 0x7d, 0x82, 0x15, 0xa7, 0xb, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x50
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
0x80, 0x7d, 0x82, 0x15, 0xa7, 0xb, 0x2, 0x10,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x50
};
const char vertBytes[] =
{
0x80, 0x1f, 0x82, 0x2, 0x27, 0x10, 0x2, 0xd0, 00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0,
0x37, 00, 0xc2, 0x20, 0xe0, 0x49, 00, 0x10, 0x7, 00, 0x9c, 0x20, 0xe1, 0x49, 00, 0x10,
0x77, 0x2, 0xc2, 0x27, 0x22, 0x40, 0x12, 0x10, 0x17, 00, 0x9c, 0x20, 0xe3, 0x49, 00, 0x10,
0xc0, 0x76, 0x9e, 0x7, 0x27, 00, 0x22, 0x10, 00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0,
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
0x80, 0x1f, 0x82, 0x2, 0x27, 0x10, 0x2, 0xd0,
00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0,
0x37, 00, 0xc2, 0x20, 0xe0, 0x49, 00, 0x10,
0x7, 00, 0x9c, 0x20, 0xe1, 0x49, 00, 0x10,
0x77, 0x2, 0xc2, 0x27, 0x22, 0x40, 0x12, 0x10,
0x17, 00, 0x9c, 0x20, 0xe3, 0x49, 00, 0x10,
0xc0, 0x76, 0x9e, 0x7, 0x27, 00, 0x22, 0x10,
00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0,
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10,
0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0x10,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
};
const char coordinateBytes[] =
{
00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0, 0x80, 0x7d, 0xc2, 0x15, 0xa7, 0x8, 0x2, 0x10,
00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0, 0x92, 0x7d, 0xc2, 0x95, 0xf0, 0x48, 0x2, 0x10,
0xde, 0x76, 0x82, 0x35, 0x21, 0x4c, 0x2, 0x10, 0x80, 0x1f, 0x82, 0x2, 0xe7, 0x8, 0x2, 0xd0,
0x16, 0x70, 0x82, 0x20, 0xe2, 0x49, 00, 0x10, 0x13, 0x70, 0x9e, 0x20, 0xe0, 0x49, 00, 0x10,
0xb, 0x70, 0x9e, 0x27, 0x21, 0x40, 0x12, 0x10, 0x40, 0x72, 0x9e, 0x7, 0x27, 00, 0x22, 0x10,
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0xd0, 0xc0, 0xf, 0x9e, 0x15, 0x27, 0xc, 0x2, 0xd0,
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
0xc0, 0x76, 0x9e, 0x15, 0x27, 0xc, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0,
0x80, 0x7d, 0xc2, 0x15, 0xa7, 0x8, 0x2, 0x10,
00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0,
0x92, 0x7d, 0xc2, 0x95, 0xf0, 0x48, 0x2, 0x10,
0xde, 0x76, 0x82, 0x35, 0x21, 0x4c, 0x2, 0x10,
0x80, 0x1f, 0x82, 0x2, 0xe7, 0x8, 0x2, 0xd0,
0x16, 0x70, 0x82, 0x20, 0xe2, 0x49, 00, 0x10,
0x13, 0x70, 0x9e, 0x20, 0xe0, 0x49, 00, 0x10,
0xb, 0x70, 0x9e, 0x27, 0x21, 0x40, 0x12, 0x10,
0x40, 0x72, 0x9e, 0x7, 0x27, 00, 0x22, 0x10,
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0xd0,
0xc0, 0xf, 0x9e, 0x15, 0x27, 0xc, 0x2, 0xd0,
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10,
0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
0xc0, 0x76, 0x9e, 0x15, 0x27, 0xc, 0x2, 0x10,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
};
// Note: support swap chain recreation (not only required for resized windows!)
@ -848,10 +867,12 @@ VkShaderModule VulkanCreateShaderModule(VkDevice& device, char* byteStream, uint
void CreateShaders()
{
//clever: use small immedate -1 interpreted as 0xffffffff (white) to set color to white
//"sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
char asm_code[] =
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_load_imm ; r0 = load32.always(0xffffffff) ; nop = load32() ;"
"sig_load_imm ; r0 = load32.always(0xffa14ccc) ; nop = load32() ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
@ -881,14 +902,15 @@ void CreateShaders()
for(uint64_t c = 0; c < num_instructions; ++c)
{
printf("%#llx ", instructions[c]);
disassemble_qpu_asm(instructions[c]);
}
char* vptr = (char*)malloc(sizeof(vertBytes));
memcpy(vptr, vertBytes, sizeof(vertBytes));
//char* fptr = (char*)malloc(sizeof(fragBytes));
//memcpy(fptr, fragBytes, sizeof(fragBytes));
char* fptr = (char*)malloc(sizeof(fragBytes));
memcpy(fptr, fragBytes, sizeof(fragBytes));
char* cptr = (char*)malloc(sizeof(coordinateBytes));
memcpy(cptr, coordinateBytes, sizeof(coordinateBytes));
@ -930,7 +952,7 @@ void CreateShaders()
assert(fsModule);
free(vptr);
//free(fptr);
free(fptr);
free(cptr);
}