mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2024-11-28 10:24:15 +01:00
2
Shader assembly loading
Yours3lf edited this page 2020-06-18 20:22:08 +01:00
Shader assembly and corresponding assemlby-to-descriptor (and push constant) mapping can be passed to the driver the following way.
Pay attention to the SPIR-V magic constants passed as those signal the driver that it's receiving assembly data.
You need to include driver/vkExt.h for the structure definitions.
The QPU assembler included with the driver can be used to write assembly code in human readable code and convert it to binary form. QPUassembler/qpu_assembler.h/c
char vs_asm_code[] =
///0x40000000 = 2.0
///uni = 1.0
///rb0 = 2 - 1 = 1
"sig_small_imm ; rx0 = fsub.ws.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0) ;\n"
///set up VPM read for subsequent reads
///0x00201a00: 0000 0000 0010 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///packed
///horizontal
///stride=1
///vectors to read = 2 (how many components)
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///uni = viewportXScale
///r0 = vpm * uni
"sig_none ; nop = nop(r0, r0, vpm_read, uni) ; r0 = fmul.always(a, b) ;\n"
///r1 = r0 * rb0 (1)
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r1 = fmul.always(r0, b) ;\n"
///uni = viewportYScale
///ra0.16a = int(r1), r2 = vpm * uni
"sig_none ; rx0.16a = ftoi.always(r1, r1, vpm_read, uni) ; r2 = fmul.always(a, b) ;\n"
///r3 = r2 * rb0
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r3 = fmul.always(r2, b) ;\n"
///ra0.16b = int(r3)
"sig_none ; rx0.16b = ftoi.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///set up VPM write for subsequent writes
///0x00001a00: 0000 0000 0000 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///horizontal
///stride = 1
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded vertex format for PSE
/// Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0);\n"
/// Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0);\n"
/// 1.0 / Wc
///vpm = rb0 (1)
"sig_none ; vpm = or.always(b, b, nop, rb0) ; nop = nop(r0, r0);\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
char cs_asm_code[] =
///uni = 1.0
///r3 = 2.0 - uni
"sig_small_imm ; r3 = fsub.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///r2 = vpm
"sig_none ; r2 = or.always(a, a, vpm_read, nop) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded coordinates format for PTB
/// write Xc
///r1 = vpm, vpm = r2
"sig_none ; r1 = or.always(a, a, vpm_read, nop) ; vpm = v8min.always(r2, r2);\n"
/// write Yc
///uni = viewportXscale
///vpm = r1, r2 = r2 * uni
"sig_none ; vpm = or.always(r1, r1, uni, nop) ; r2 = fmul.always(r2, a);\n"
///uni = viewportYscale
///r1 = r1 * uni
"sig_none ; nop = nop(r0, r0, uni, nop) ; r1 = fmul.always(r1, a);\n"
///r0 = r2 * r3
"sig_none ; nop = nop(r0, r0) ; r0 = fmul.always(r2, r3);\n"
///ra0.16a = r0, r1 = r1 * r3
"sig_none ; rx0.16a = ftoi.always(r0, r0) ; r1 = fmul.always(r1, r3) ;\n"
///ra0.16b = r1
"sig_none ; rx0.16b = ftoi.always(r1, r1) ; nop = nop(r0, r0) ;\n"
///write Zc
///vpm = 0
"sig_small_imm ; vpm = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;\n"
///write Wc
///vpm = 1.0
"sig_small_imm ; vpm = or.always(b, b, nop, 0x3f800000) ; nop = nop(r0, r0) ;\n"
///write Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0) ;\n"
///write Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;\n"
///write 1/Wc
///vpm = r3
"sig_none ; vpm = or.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
//display a color
char fs_asm_code[] =
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///BGRA
"sig_load_imm ; r0 = load32.always(0xffa14ccc) ; nop = load32() ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"\0";
VkRpiAssemblyMappingEXT vertexMappings[] = {
//vertex shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
4, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
8, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
12, //resource offset
}
};
uint32_t spirv[6];
uint64_t* asm_ptrs[4] = {};
uint32_t asm_sizes[4] = {};
VkRpiAssemblyMappingEXT* asm_mappings[4] = {};
uint32_t asm_mappings_sizes[4] = {};
VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {};
shaderModuleCreateInfo.instructions = asm_ptrs;
shaderModuleCreateInfo.numInstructions = asm_sizes;
shaderModuleCreateInfo.mappings = asm_mappings;
shaderModuleCreateInfo.numMappings = asm_mappings_sizes;
asm_mappings[VK_RPI_ASSEMBLY_TYPE_VERTEX] = vertexMappings;
asm_mappings_sizes[VK_RPI_ASSEMBLY_TYPE_VERTEX] = sizeof(vertexMappings) / sizeof(VkRpiAssemblyMappingEXT);
{ //assemble cs code
asm_sizes[0] = get_num_instructions(cs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[0];
asm_ptrs[0] = (uint64_t*)malloc(size);
//modifies the passed string's contents
assemble_qpu_asm(cs_asm_code, asm_ptrs[0]);
}
{ //assemble vs code
asm_sizes[1] = get_num_instructions(vs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[1];
asm_ptrs[1] = (uint64_t*)malloc(size);
assemble_qpu_asm(vs_asm_code, asm_ptrs[1]);
}
{ //assemble fs code
asm_sizes[2] = get_num_instructions(fs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[2];
asm_ptrs[2] = (uint64_t*)malloc(size);
assemble_qpu_asm(fs_asm_code, asm_ptrs[2]);
}
spirv[0] = 0x07230203;
spirv[1] = 0x00010000;
spirv[2] = 0x14E45250;
spirv[3] = 1;
spirv[4] = (uint32_t)&shaderModuleCreateInfo;
//words start here
spirv[5] = 1 << 16;
VkShaderModuleCreateInfo smci = {};
smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
smci.codeSize = sizeof(uint32_t)*6;
smci.pCode = spirv;
vkCreateShaderModule(device, &smci, 0, &shaderModule);
for(uint32_t c = 0; c < 4; ++c)
{
free(asm_ptrs[c]);
}