mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2025-01-18 10:52:14 +01:00
using assembly as a fragment shader works
This commit is contained in:
parent
9283288a7a
commit
ef5227a2be
@ -1,4 +1,4 @@
|
||||
#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@ -7,8 +7,13 @@
|
||||
int main()
|
||||
{
|
||||
char asm_code[] =
|
||||
"sig_none ; rx0 = add(r2, r3) ; rx1 = fmul(r2, r3) ;\n"
|
||||
"\0";
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"\0";
|
||||
|
||||
printf("%s", asm_code);
|
||||
|
||||
@ -35,7 +40,7 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t* instruction_size = sizeof(uint64_t)*num_instructions;
|
||||
uint64_t instruction_size = sizeof(uint64_t)*num_instructions;
|
||||
uint64_t* instructions = malloc(instruction_size);
|
||||
|
||||
assemble_qpu_asm(asm_code, instructions);
|
||||
|
@ -718,7 +718,6 @@ void parse_args_alu(char** str, qpu_mux* in_a, qpu_mux* in_b, uint8_t* raddr_a,
|
||||
char* arg = strtok(*str, " \n\v\f\r\t,");
|
||||
|
||||
unsigned num_muxes = sizeof(qpu_mux_str) / sizeof(const char *);
|
||||
unsigned found = 0;
|
||||
|
||||
for(unsigned c = 0; c < num_muxes && arg; ++c)
|
||||
{
|
||||
@ -726,7 +725,6 @@ void parse_args_alu(char** str, qpu_mux* in_a, qpu_mux* in_b, uint8_t* raddr_a,
|
||||
{
|
||||
*str = arg;
|
||||
*in_a = c;
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -79,12 +79,6 @@ typedef enum {
|
||||
QPU_MUX_R5, //special purpose
|
||||
QPU_MUX_A,
|
||||
QPU_MUX_B,
|
||||
|
||||
/**
|
||||
* Non-hardware mux value, stores a small immediate field to be
|
||||
* programmed into raddr_b in the qpu_reg.index.
|
||||
*/
|
||||
QPU_MUX_SMALL_IMM,
|
||||
} qpu_mux;
|
||||
|
||||
static const char *qpu_mux_str[] = {
|
||||
@ -96,7 +90,6 @@ static const char *qpu_mux_str[] = {
|
||||
[QPU_MUX_R5] = "r5",
|
||||
[QPU_MUX_A] = "a",
|
||||
[QPU_MUX_B] = "b",
|
||||
[QPU_MUX_SMALL_IMM] = "imm",
|
||||
};
|
||||
|
||||
|
||||
@ -154,37 +147,37 @@ uint8_t qpu_encode_small_immediate(uint32_t i)
|
||||
return i + 32;
|
||||
|
||||
switch (i) {
|
||||
case 0x3f800000:
|
||||
case 0x3f800000: //1.0
|
||||
return 32;
|
||||
case 0x40000000:
|
||||
case 0x40000000: //2.0
|
||||
return 33;
|
||||
case 0x40800000:
|
||||
case 0x40800000: //4.0
|
||||
return 34;
|
||||
case 0x41000000:
|
||||
case 0x41000000: //8.0
|
||||
return 35;
|
||||
case 0x41800000:
|
||||
case 0x41800000: //16.0
|
||||
return 36;
|
||||
case 0x42000000:
|
||||
case 0x42000000: //32.0
|
||||
return 37;
|
||||
case 0x42800000:
|
||||
case 0x42800000: //64.0
|
||||
return 38;
|
||||
case 0x43000000:
|
||||
case 0x43000000: //128.0
|
||||
return 39;
|
||||
case 0x3b800000:
|
||||
case 0x3b800000: //1.0/256.0
|
||||
return 40;
|
||||
case 0x3c000000:
|
||||
case 0x3c000000: //1.0/128.0
|
||||
return 41;
|
||||
case 0x3c800000:
|
||||
case 0x3c800000: //1.0/64.0
|
||||
return 42;
|
||||
case 0x3d000000:
|
||||
case 0x3d000000: //1.0/32.0
|
||||
return 43;
|
||||
case 0x3d800000:
|
||||
case 0x3d800000: //1.0/16.0
|
||||
return 44;
|
||||
case 0x3e000000:
|
||||
case 0x3e000000: //1.0/8.0
|
||||
return 45;
|
||||
case 0x3e800000:
|
||||
case 0x3e800000: //1.0/4.0
|
||||
return 46;
|
||||
case 0x3f000000:
|
||||
case 0x3f000000: //1.0/2.0
|
||||
return 47;
|
||||
}
|
||||
|
||||
|
@ -312,6 +312,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
|
||||
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
|
||||
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
|
||||
|
||||
/**/
|
||||
printf("BCL:\n");
|
||||
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
|
||||
printf("BO handles: ");
|
||||
@ -331,6 +332,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
|
||||
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
|
||||
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
|
||||
printf("flags %u\n", cmdbuf->submitCl.flags);
|
||||
/**/
|
||||
|
||||
|
||||
//submit ioctl
|
||||
|
@ -227,7 +227,7 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
|
||||
//FS
|
||||
uniform count : 1
|
||||
tex sample count : 0
|
||||
uniform constant : 4291579008
|
||||
uniform constant : 4291579008 (color: #CC4C80, alpha FF)
|
||||
|
||||
//VS
|
||||
uniform count : 4
|
||||
@ -245,9 +245,12 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
|
||||
uniform viewport xscale : 15360.000000
|
||||
uniform viewport zoffset : 0.500000
|
||||
/**/
|
||||
clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(1+4+4));
|
||||
|
||||
//TODO: if fragment shader doesn't use any uniforms, then VS will expect to read the first uniform in the stream
|
||||
//clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(1+4+4));
|
||||
clFit(commandBuffer, &commandBuffer->uniformsCl, 4*(4+4));
|
||||
//FS
|
||||
clInsertUniformConstant(&commandBuffer->uniformsCl, 4291579008);
|
||||
//clInsertUniformConstant(&commandBuffer->uniformsCl, 4291579008);
|
||||
//VS
|
||||
clInsertUniformConstant(&commandBuffer->uniformsCl, 1065353216);
|
||||
clInsertUniformXYScale(&commandBuffer->uniformsCl, (float)(i->width) * 0.5f * 16.0f);
|
||||
|
@ -35,33 +35,52 @@ const char* vertShader =
|
||||
|
||||
const char fragBytes[] =
|
||||
{
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
0x80, 0x7d, 0x82, 0x15, 0xa7, 0xb, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x50
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
0x80, 0x7d, 0x82, 0x15, 0xa7, 0xb, 0x2, 0x10,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x50
|
||||
};
|
||||
|
||||
const char vertBytes[] =
|
||||
{
|
||||
0x80, 0x1f, 0x82, 0x2, 0x27, 0x10, 0x2, 0xd0, 00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0,
|
||||
0x37, 00, 0xc2, 0x20, 0xe0, 0x49, 00, 0x10, 0x7, 00, 0x9c, 0x20, 0xe1, 0x49, 00, 0x10,
|
||||
0x77, 0x2, 0xc2, 0x27, 0x22, 0x40, 0x12, 0x10, 0x17, 00, 0x9c, 0x20, 0xe3, 0x49, 00, 0x10,
|
||||
0xc0, 0x76, 0x9e, 0x7, 0x27, 00, 0x22, 0x10, 00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0,
|
||||
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
0x80, 0x1f, 0x82, 0x2, 0x27, 0x10, 0x2, 0xd0,
|
||||
00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0,
|
||||
0x37, 00, 0xc2, 0x20, 0xe0, 0x49, 00, 0x10,
|
||||
0x7, 00, 0x9c, 0x20, 0xe1, 0x49, 00, 0x10,
|
||||
0x77, 0x2, 0xc2, 0x27, 0x22, 0x40, 0x12, 0x10,
|
||||
0x17, 00, 0x9c, 0x20, 0xe3, 0x49, 00, 0x10,
|
||||
0xc0, 0x76, 0x9e, 0x7, 0x27, 00, 0x22, 0x10,
|
||||
00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0,
|
||||
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
};
|
||||
|
||||
const char coordinateBytes[] =
|
||||
{
|
||||
00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0, 0x80, 0x7d, 0xc2, 0x15, 0xa7, 0x8, 0x2, 0x10,
|
||||
00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0, 0x92, 0x7d, 0xc2, 0x95, 0xf0, 0x48, 0x2, 0x10,
|
||||
0xde, 0x76, 0x82, 0x35, 0x21, 0x4c, 0x2, 0x10, 0x80, 0x1f, 0x82, 0x2, 0xe7, 0x8, 0x2, 0xd0,
|
||||
0x16, 0x70, 0x82, 0x20, 0xe2, 0x49, 00, 0x10, 0x13, 0x70, 0x9e, 0x20, 0xe0, 0x49, 00, 0x10,
|
||||
0xb, 0x70, 0x9e, 0x27, 0x21, 0x40, 0x12, 0x10, 0x40, 0x72, 0x9e, 0x7, 0x27, 00, 0x22, 0x10,
|
||||
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0xd0, 0xc0, 0xf, 0x9e, 0x15, 0x27, 0xc, 0x2, 0xd0,
|
||||
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10, 0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
0xc0, 0x76, 0x9e, 0x15, 0x27, 0xc, 0x2, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10, 00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
00, 0x1a, 0x20, 00, 0x67, 0x4c, 0x2, 0xe0,
|
||||
0x80, 0x7d, 0xc2, 0x15, 0xa7, 0x8, 0x2, 0x10,
|
||||
00, 0x1a, 00, 00, 0x67, 0x5c, 0x2, 0xe0,
|
||||
0x92, 0x7d, 0xc2, 0x95, 0xf0, 0x48, 0x2, 0x10,
|
||||
0xde, 0x76, 0x82, 0x35, 0x21, 0x4c, 0x2, 0x10,
|
||||
0x80, 0x1f, 0x82, 0x2, 0xe7, 0x8, 0x2, 0xd0,
|
||||
0x16, 0x70, 0x82, 0x20, 0xe2, 0x49, 00, 0x10,
|
||||
0x13, 0x70, 0x9e, 0x20, 0xe0, 0x49, 00, 0x10,
|
||||
0xb, 0x70, 0x9e, 0x27, 0x21, 0x40, 0x12, 0x10,
|
||||
0x40, 0x72, 0x9e, 0x7, 0x27, 00, 0x22, 0x10,
|
||||
0xc0, 0xf, 0x9c, 0x15, 0x27, 0xc, 0x2, 0xd0,
|
||||
0xc0, 0xf, 0x9e, 0x15, 0x27, 0xc, 0x2, 0xd0,
|
||||
0x80, 0x7d, 0x2, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
0x80, 0x7d, 0x82, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
0xc0, 0x76, 0x9e, 0x15, 0x27, 0xc, 0x2, 0x10,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x30,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
00, 0x70, 0x9e, 00, 0xe7, 0x9, 00, 0x10,
|
||||
};
|
||||
|
||||
// Note: support swap chain recreation (not only required for resized windows!)
|
||||
@ -848,10 +867,12 @@ VkShaderModule VulkanCreateShaderModule(VkDevice& device, char* byteStream, uint
|
||||
|
||||
void CreateShaders()
|
||||
{
|
||||
//clever: use small immedate -1 interpreted as 0xffffffff (white) to set color to white
|
||||
//"sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
|
||||
|
||||
char asm_code[] =
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_load_imm ; r0 = load32.always(0xffffffff) ; nop = load32() ;"
|
||||
"sig_load_imm ; r0 = load32.always(0xffa14ccc) ; nop = load32() ;"
|
||||
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
@ -881,14 +902,15 @@ void CreateShaders()
|
||||
|
||||
for(uint64_t c = 0; c < num_instructions; ++c)
|
||||
{
|
||||
printf("%#llx ", instructions[c]);
|
||||
disassemble_qpu_asm(instructions[c]);
|
||||
}
|
||||
|
||||
char* vptr = (char*)malloc(sizeof(vertBytes));
|
||||
memcpy(vptr, vertBytes, sizeof(vertBytes));
|
||||
|
||||
//char* fptr = (char*)malloc(sizeof(fragBytes));
|
||||
//memcpy(fptr, fragBytes, sizeof(fragBytes));
|
||||
char* fptr = (char*)malloc(sizeof(fragBytes));
|
||||
memcpy(fptr, fragBytes, sizeof(fragBytes));
|
||||
|
||||
char* cptr = (char*)malloc(sizeof(coordinateBytes));
|
||||
memcpy(cptr, coordinateBytes, sizeof(coordinateBytes));
|
||||
@ -930,7 +952,7 @@ void CreateShaders()
|
||||
assert(fsModule);
|
||||
|
||||
free(vptr);
|
||||
//free(fptr);
|
||||
free(fptr);
|
||||
free(cptr);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user