mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2025-02-26 23:54:17 +01:00
Created Shader assembly loading (markdown)
parent
a7c5351ddb
commit
f5ad0b1427
205
Shader-assembly-loading.md
Normal file
205
Shader-assembly-loading.md
Normal file
@ -0,0 +1,205 @@
|
||||
Shader assembly and corresponding assemlby-to-descriptor (and push constant) mapping can be passed to the driver the following way. Pay attention to the SPIR-V magic constants passed as those signal the driver that it's receiving assembly data. You need to include driver/vkExt.h for the structure definitions.
|
||||
The QPU assembler included with the driver can be used to write assembly code in human readable code and convert it to binary form. QPUassembler/qpu_assembler.h/c
|
||||
```
|
||||
char vs_asm_code[] =
|
||||
///0x40000000 = 2.0
|
||||
///uni = 1.0
|
||||
///rb0 = 2 - 1 = 1
|
||||
"sig_small_imm ; rx0 = fsub.ws.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0) ;\n"
|
||||
///set up VPM read for subsequent reads
|
||||
///0x00201a00: 0000 0000 0010 0000 0001 1010 0000 0000
|
||||
///addr: 0
|
||||
///size: 32bit
|
||||
///packed
|
||||
///horizontal
|
||||
///stride=1
|
||||
///vectors to read = 2 (how many components)
|
||||
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
|
||||
///uni = viewportXScale
|
||||
///r0 = vpm * uni
|
||||
"sig_none ; nop = nop(r0, r0, vpm_read, uni) ; r0 = fmul.always(a, b) ;\n"
|
||||
///r1 = r0 * rb0 (1)
|
||||
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r1 = fmul.always(r0, b) ;\n"
|
||||
///uni = viewportYScale
|
||||
///ra0.16a = int(r1), r2 = vpm * uni
|
||||
"sig_none ; rx0.16a = ftoi.always(r1, r1, vpm_read, uni) ; r2 = fmul.always(a, b) ;\n"
|
||||
///r3 = r2 * rb0
|
||||
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r3 = fmul.always(r2, b) ;\n"
|
||||
///ra0.16b = int(r3)
|
||||
"sig_none ; rx0.16b = ftoi.always(r3, r3) ; nop = nop(r0, r0) ;\n"
|
||||
///set up VPM write for subsequent writes
|
||||
///0x00001a00: 0000 0000 0000 0000 0001 1010 0000 0000
|
||||
///addr: 0
|
||||
///size: 32bit
|
||||
///horizontal
|
||||
///stride = 1
|
||||
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
|
||||
///shaded vertex format for PSE
|
||||
/// Ys and Xs
|
||||
///vpm = ra0
|
||||
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0);\n"
|
||||
/// Zs
|
||||
///uni = 0.5
|
||||
///vpm = uni
|
||||
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0);\n"
|
||||
/// 1.0 / Wc
|
||||
///vpm = rb0 (1)
|
||||
"sig_none ; vpm = or.always(b, b, nop, rb0) ; nop = nop(r0, r0);\n"
|
||||
///END
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
|
||||
"\0";
|
||||
|
||||
char cs_asm_code[] =
|
||||
///uni = 1.0
|
||||
///r3 = 2.0 - uni
|
||||
"sig_small_imm ; r3 = fsub.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0);\n"
|
||||
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
|
||||
///r2 = vpm
|
||||
"sig_none ; r2 = or.always(a, a, vpm_read, nop) ; nop = nop(r0, r0);\n"
|
||||
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
|
||||
///shaded coordinates format for PTB
|
||||
/// write Xc
|
||||
///r1 = vpm, vpm = r2
|
||||
"sig_none ; r1 = or.always(a, a, vpm_read, nop) ; vpm = v8min.always(r2, r2);\n"
|
||||
/// write Yc
|
||||
///uni = viewportXscale
|
||||
///vpm = r1, r2 = r2 * uni
|
||||
"sig_none ; vpm = or.always(r1, r1, uni, nop) ; r2 = fmul.always(r2, a);\n"
|
||||
///uni = viewportYscale
|
||||
///r1 = r1 * uni
|
||||
"sig_none ; nop = nop(r0, r0, uni, nop) ; r1 = fmul.always(r1, a);\n"
|
||||
///r0 = r2 * r3
|
||||
"sig_none ; nop = nop(r0, r0) ; r0 = fmul.always(r2, r3);\n"
|
||||
///ra0.16a = r0, r1 = r1 * r3
|
||||
"sig_none ; rx0.16a = ftoi.always(r0, r0) ; r1 = fmul.always(r1, r3) ;\n"
|
||||
///ra0.16b = r1
|
||||
"sig_none ; rx0.16b = ftoi.always(r1, r1) ; nop = nop(r0, r0) ;\n"
|
||||
///write Zc
|
||||
///vpm = 0
|
||||
"sig_small_imm ; vpm = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;\n"
|
||||
///write Wc
|
||||
///vpm = 1.0
|
||||
"sig_small_imm ; vpm = or.always(b, b, nop, 0x3f800000) ; nop = nop(r0, r0) ;\n"
|
||||
///write Ys and Xs
|
||||
///vpm = ra0
|
||||
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0) ;\n"
|
||||
///write Zs
|
||||
///uni = 0.5
|
||||
///vpm = uni
|
||||
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;\n"
|
||||
///write 1/Wc
|
||||
///vpm = r3
|
||||
"sig_none ; vpm = or.always(r3, r3) ; nop = nop(r0, r0) ;\n"
|
||||
///END
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
|
||||
"\0";
|
||||
|
||||
//display a color
|
||||
char fs_asm_code[] =
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
///BGRA
|
||||
"sig_load_imm ; r0 = load32.always(0xffa14ccc) ; nop = load32() ;"
|
||||
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"\0";
|
||||
|
||||
VkRpiAssemblyMappingEXT vertexMappings[] = {
|
||||
//vertex shader uniforms
|
||||
{
|
||||
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
|
||||
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
|
||||
0, //descriptor set #
|
||||
0, //descriptor binding #
|
||||
0, //descriptor array element #
|
||||
0, //resource offset
|
||||
},
|
||||
{
|
||||
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
|
||||
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
|
||||
0, //descriptor set #
|
||||
0, //descriptor binding #
|
||||
0, //descriptor array element #
|
||||
4, //resource offset
|
||||
},
|
||||
{
|
||||
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
|
||||
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
|
||||
0, //descriptor set #
|
||||
0, //descriptor binding #
|
||||
0, //descriptor array element #
|
||||
8, //resource offset
|
||||
},
|
||||
{
|
||||
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
|
||||
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
|
||||
0, //descriptor set #
|
||||
0, //descriptor binding #
|
||||
0, //descriptor array element #
|
||||
12, //resource offset
|
||||
}
|
||||
};
|
||||
|
||||
uint32_t spirv[6];
|
||||
|
||||
uint64_t* asm_ptrs[4] = {};
|
||||
uint32_t asm_sizes[4] = {};
|
||||
|
||||
VkRpiAssemblyMappingEXT* asm_mappings[4] = {};
|
||||
uint32_t asm_mappings_sizes[4] = {};
|
||||
|
||||
VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {};
|
||||
shaderModuleCreateInfo.instructions = asm_ptrs;
|
||||
shaderModuleCreateInfo.numInstructions = asm_sizes;
|
||||
shaderModuleCreateInfo.mappings = asm_mappings;
|
||||
shaderModuleCreateInfo.numMappings = asm_mappings_sizes;
|
||||
|
||||
asm_mappings[VK_RPI_ASSEMBLY_TYPE_VERTEX] = vertexMappings;
|
||||
asm_mappings_sizes[VK_RPI_ASSEMBLY_TYPE_VERTEX] = sizeof(vertexMappings) / sizeof(VkRpiAssemblyMappingEXT);
|
||||
|
||||
{ //assemble cs code
|
||||
asm_sizes[0] = get_num_instructions(cs_asm_code);
|
||||
uint32_t size = sizeof(uint64_t)*asm_sizes[0];
|
||||
asm_ptrs[0] = (uint64_t*)malloc(size);
|
||||
//modifies the passed string's contents
|
||||
assemble_qpu_asm(cs_asm_code, asm_ptrs[0]);
|
||||
}
|
||||
|
||||
{ //assemble vs code
|
||||
asm_sizes[1] = get_num_instructions(vs_asm_code);
|
||||
uint32_t size = sizeof(uint64_t)*asm_sizes[1];
|
||||
asm_ptrs[1] = (uint64_t*)malloc(size);
|
||||
assemble_qpu_asm(vs_asm_code, asm_ptrs[1]);
|
||||
}
|
||||
|
||||
{ //assemble fs code
|
||||
asm_sizes[2] = get_num_instructions(fs_asm_code);
|
||||
uint32_t size = sizeof(uint64_t)*asm_sizes[2];
|
||||
asm_ptrs[2] = (uint64_t*)malloc(size);
|
||||
assemble_qpu_asm(fs_asm_code, asm_ptrs[2]);
|
||||
}
|
||||
|
||||
spirv[0] = 0x07230203;
|
||||
spirv[1] = 0x00010000;
|
||||
spirv[2] = 0x14E45250;
|
||||
spirv[3] = 1;
|
||||
spirv[4] = (uint32_t)&shaderModuleCreateInfo;
|
||||
//words start here
|
||||
spirv[5] = 1 << 16;
|
||||
|
||||
VkShaderModuleCreateInfo smci = {};
|
||||
smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
smci.codeSize = sizeof(uint32_t)*6;
|
||||
smci.pCode = spirv;
|
||||
vkCreateShaderModule(device, &smci, 0, &shaderModule);
|
||||
|
||||
for(uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
free(asm_ptrs[c]);
|
||||
}
|
||||
```
|
Loading…
x
Reference in New Issue
Block a user