From f5ad0b14271e2e89ac6efcf130c0a9ccbc39e760 Mon Sep 17 00:00:00 2001 From: Yours3lf <0.tamas.marton@gmail.com> Date: Thu, 18 Jun 2020 20:21:41 +0100 Subject: [PATCH] Created Shader assembly loading (markdown) --- Shader-assembly-loading.md | 205 +++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 Shader-assembly-loading.md diff --git a/Shader-assembly-loading.md b/Shader-assembly-loading.md new file mode 100644 index 0000000..846b622 --- /dev/null +++ b/Shader-assembly-loading.md @@ -0,0 +1,205 @@ +Shader assembly and corresponding assemlby-to-descriptor (and push constant) mapping can be passed to the driver the following way. Pay attention to the SPIR-V magic constants passed as those signal the driver that it's receiving assembly data. You need to include driver/vkExt.h for the structure definitions. +The QPU assembler included with the driver can be used to write assembly code in human readable code and convert it to binary form. QPUassembler/qpu_assembler.h/c +``` +char vs_asm_code[] = +///0x40000000 = 2.0 +///uni = 1.0 +///rb0 = 2 - 1 = 1 +"sig_small_imm ; rx0 = fsub.ws.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0) ;\n" +///set up VPM read for subsequent reads +///0x00201a00: 0000 0000 0010 0000 0001 1010 0000 0000 +///addr: 0 +///size: 32bit +///packed +///horizontal +///stride=1 +///vectors to read = 2 (how many components) +"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n" +///uni = viewportXScale +///r0 = vpm * uni +"sig_none ; nop = nop(r0, r0, vpm_read, uni) ; r0 = fmul.always(a, b) ;\n" +///r1 = r0 * rb0 (1) +"sig_none ; nop = nop(r0, r0, nop, rb0) ; r1 = fmul.always(r0, b) ;\n" +///uni = viewportYScale +///ra0.16a = int(r1), r2 = vpm * uni +"sig_none ; rx0.16a = ftoi.always(r1, r1, vpm_read, uni) ; r2 = fmul.always(a, b) ;\n" +///r3 = r2 * rb0 +"sig_none ; nop = nop(r0, r0, nop, rb0) ; r3 = fmul.always(r2, b) ;\n" +///ra0.16b = int(r3) +"sig_none ; rx0.16b = ftoi.always(r3, r3) ; nop = nop(r0, r0) ;\n" +///set up VPM write for subsequent writes +///0x00001a00: 0000 0000 0000 0000 0001 1010 0000 0000 +///addr: 0 +///size: 32bit +///horizontal +///stride = 1 +"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n" +///shaded vertex format for PSE +/// Ys and Xs +///vpm = ra0 +"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0);\n" +/// Zs +///uni = 0.5 +///vpm = uni +"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0);\n" +/// 1.0 / Wc +///vpm = rb0 (1) +"sig_none ; vpm = or.always(b, b, nop, rb0) ; nop = nop(r0, r0);\n" +///END +"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n" +"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n" +"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n" + "\0"; + +char cs_asm_code[] = +///uni = 1.0 +///r3 = 2.0 - uni +"sig_small_imm ; r3 = fsub.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0);\n" +"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n" +///r2 = vpm +"sig_none ; r2 = or.always(a, a, vpm_read, nop) ; nop = nop(r0, r0);\n" +"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n" +///shaded coordinates format for PTB +/// write Xc +///r1 = vpm, vpm = r2 +"sig_none ; r1 = or.always(a, a, vpm_read, nop) ; vpm = v8min.always(r2, r2);\n" +/// write Yc +///uni = viewportXscale +///vpm = r1, r2 = r2 * uni +"sig_none ; vpm = or.always(r1, r1, uni, nop) ; r2 = fmul.always(r2, a);\n" +///uni = viewportYscale +///r1 = r1 * uni +"sig_none ; nop = nop(r0, r0, uni, nop) ; r1 = fmul.always(r1, a);\n" +///r0 = r2 * r3 +"sig_none ; nop = nop(r0, r0) ; r0 = fmul.always(r2, r3);\n" +///ra0.16a = r0, r1 = r1 * r3 +"sig_none ; rx0.16a = ftoi.always(r0, r0) ; r1 = fmul.always(r1, r3) ;\n" +///ra0.16b = r1 +"sig_none ; rx0.16b = ftoi.always(r1, r1) ; nop = nop(r0, r0) ;\n" +///write Zc +///vpm = 0 +"sig_small_imm ; vpm = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;\n" +///write Wc +///vpm = 1.0 +"sig_small_imm ; vpm = or.always(b, b, nop, 0x3f800000) ; nop = nop(r0, r0) ;\n" +///write Ys and Xs +///vpm = ra0 +"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0) ;\n" +///write Zs +///uni = 0.5 +///vpm = uni +"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;\n" +///write 1/Wc +///vpm = r3 +"sig_none ; vpm = or.always(r3, r3) ; nop = nop(r0, r0) ;\n" +///END +"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n" +"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n" +"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n" + "\0"; + +//display a color +char fs_asm_code[] = +"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" +///BGRA +"sig_load_imm ; r0 = load32.always(0xffa14ccc) ; nop = load32() ;" +"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;" +"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" +"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" +"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" + "\0"; + +VkRpiAssemblyMappingEXT vertexMappings[] = { + //vertex shader uniforms + { + VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT, + VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type + 0, //descriptor set # + 0, //descriptor binding # + 0, //descriptor array element # + 0, //resource offset + }, + { + VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT, + VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type + 0, //descriptor set # + 0, //descriptor binding # + 0, //descriptor array element # + 4, //resource offset + }, + { + VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT, + VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type + 0, //descriptor set # + 0, //descriptor binding # + 0, //descriptor array element # + 8, //resource offset + }, + { + VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT, + VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type + 0, //descriptor set # + 0, //descriptor binding # + 0, //descriptor array element # + 12, //resource offset + } +}; + +uint32_t spirv[6]; + +uint64_t* asm_ptrs[4] = {}; +uint32_t asm_sizes[4] = {}; + +VkRpiAssemblyMappingEXT* asm_mappings[4] = {}; +uint32_t asm_mappings_sizes[4] = {}; + +VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; +shaderModuleCreateInfo.instructions = asm_ptrs; +shaderModuleCreateInfo.numInstructions = asm_sizes; +shaderModuleCreateInfo.mappings = asm_mappings; +shaderModuleCreateInfo.numMappings = asm_mappings_sizes; + +asm_mappings[VK_RPI_ASSEMBLY_TYPE_VERTEX] = vertexMappings; +asm_mappings_sizes[VK_RPI_ASSEMBLY_TYPE_VERTEX] = sizeof(vertexMappings) / sizeof(VkRpiAssemblyMappingEXT); + +{ //assemble cs code + asm_sizes[0] = get_num_instructions(cs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[0]; + asm_ptrs[0] = (uint64_t*)malloc(size); + //modifies the passed string's contents + assemble_qpu_asm(cs_asm_code, asm_ptrs[0]); +} + +{ //assemble vs code + asm_sizes[1] = get_num_instructions(vs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[1]; + asm_ptrs[1] = (uint64_t*)malloc(size); + assemble_qpu_asm(vs_asm_code, asm_ptrs[1]); +} + +{ //assemble fs code + asm_sizes[2] = get_num_instructions(fs_asm_code); + uint32_t size = sizeof(uint64_t)*asm_sizes[2]; + asm_ptrs[2] = (uint64_t*)malloc(size); + assemble_qpu_asm(fs_asm_code, asm_ptrs[2]); +} + +spirv[0] = 0x07230203; +spirv[1] = 0x00010000; +spirv[2] = 0x14E45250; +spirv[3] = 1; +spirv[4] = (uint32_t)&shaderModuleCreateInfo; +//words start here +spirv[5] = 1 << 16; + +VkShaderModuleCreateInfo smci = {}; +smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; +smci.codeSize = sizeof(uint32_t)*6; +smci.pCode = spirv; +vkCreateShaderModule(device, &smci, 0, &shaderModule); + +for(uint32_t c = 0; c < 4; ++c) +{ + free(asm_ptrs[c]); +} +``` \ No newline at end of file