mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2024-12-10 22:24:14 +01:00
435 lines
11 KiB
C
435 lines
11 KiB
C
#include "common.h"
|
|
|
|
#include "kernel/vc4_packet.h"
|
|
|
|
#include "QPUassembler/qpu_assembler.h"
|
|
#include "QPUassembler/vc4_qpu_enums.h"
|
|
#include "QPUassembler/vc4_qpu_defines.h"
|
|
|
|
#include "vkExt.h"
|
|
|
|
//TODO check if shader has flow control and make sure instance also has flow control
|
|
|
|
/*
|
|
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateShaderModule
|
|
*/
|
|
VkResult rpi_vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule)
|
|
{
|
|
uint32_t magic = pCreateInfo->pCode[2];
|
|
VkRpiShaderModuleAssemblyCreateInfoEXT* ci = pCreateInfo->pCode[4];
|
|
|
|
//shader magic doesn't add up
|
|
if(magic != 0x14E45250)
|
|
{
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
assert(ci);
|
|
assert(pShaderModule);
|
|
assert(ci->instructions);
|
|
|
|
_shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
if(!shader)
|
|
{
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
shader->hasThreadSwitch = 0;
|
|
shader->numTextureSamples = 0;
|
|
shader->numVaryings = 0;
|
|
shader->numFragUniformReads = 0;
|
|
shader->numVertUniformReads = 0;
|
|
shader->numCoordUniformReads = 0;
|
|
shader->numVertVPMreads = 0;
|
|
shader->numCoordVPMreads = 0;
|
|
shader->numVertVPMwrites = 0;
|
|
shader->numCoordVPMwrites = 0;
|
|
shader->numFragCycles = 0;
|
|
shader->numVertCycles = 0;
|
|
shader->numCoordCycles = 0;
|
|
shader->numFragALUcycles = 0;
|
|
shader->numVertALUcycles = 0;
|
|
shader->numCoordALUcycles = 0;
|
|
shader->numEmptyFragALUinstructions = 0;
|
|
shader->numEmptyVertALUinstructions = 0;
|
|
shader->numEmptyCoordALUinstructions = 0;
|
|
shader->numFragBranches = 0;
|
|
shader->numVertBranches = 0;
|
|
shader->numCoordBranches = 0;
|
|
shader->numFragSFUoperations = 0;
|
|
shader->numVertSFUoperations = 0;
|
|
shader->numCoordSFUoperations = 0;
|
|
|
|
uint32_t hadVertex = 0, hadCoordinate = 0;
|
|
|
|
for(int c = 0; c < VK_RPI_ASSEMBLY_TYPE_MAX; ++c)
|
|
{
|
|
if(ci->instructions[c])
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
hadVertex = 1;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
hadCoordinate = 1;
|
|
}
|
|
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_FRAGMENT)
|
|
{
|
|
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
|
|
{
|
|
uint64_t s = (ci->instructions[c][d] & (0xfll << QPU_SIG_SHIFT)) >> QPU_SIG_SHIFT;
|
|
if(s == QPU_SIG_THREAD_SWITCH || s == QPU_SIG_LAST_THREAD_SWITCH)
|
|
{
|
|
shader->hasThreadSwitch = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint32_t numTMUwrites = 0;
|
|
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
|
|
{
|
|
unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
|
|
unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << QPU_SIG_SHIFT)) >> QPU_SIG_SHIFT);
|
|
|
|
unsigned waddr_add = ((ci->instructions[c][d] & (0x3fll << QPU_WADDR_ADD_SHIFT)) >> QPU_WADDR_ADD_SHIFT);
|
|
unsigned waddr_mul = ((ci->instructions[c][d] & (0x3fll << QPU_WADDR_MUL_SHIFT)) >> QPU_WADDR_MUL_SHIFT);
|
|
|
|
if(waddr_add == QPU_W_SFU_RECIP ||
|
|
waddr_add == QPU_W_SFU_RECIPSQRT ||
|
|
waddr_add == QPU_W_SFU_EXP ||
|
|
waddr_add == QPU_W_SFU_LOG)
|
|
{
|
|
shader->numFragSFUoperations++;
|
|
}
|
|
|
|
if(waddr_mul == QPU_W_SFU_RECIP ||
|
|
waddr_mul == QPU_W_SFU_RECIPSQRT ||
|
|
waddr_mul == QPU_W_SFU_EXP ||
|
|
waddr_mul == QPU_W_SFU_LOG)
|
|
{
|
|
shader->numFragSFUoperations++;
|
|
}
|
|
|
|
if(waddr_add == QPU_W_TMU0_S ||
|
|
waddr_add == QPU_W_TMU0_T ||
|
|
waddr_add == QPU_W_TMU0_R ||
|
|
waddr_add == QPU_W_TMU0_B ||
|
|
waddr_add == QPU_W_TMU1_S ||
|
|
waddr_add == QPU_W_TMU1_T ||
|
|
waddr_add == QPU_W_TMU1_R ||
|
|
waddr_add == QPU_W_TMU1_B)
|
|
{
|
|
numTMUwrites++;
|
|
}
|
|
|
|
if(waddr_mul == QPU_W_TMU0_S ||
|
|
waddr_mul == QPU_W_TMU0_T ||
|
|
waddr_mul == QPU_W_TMU0_R ||
|
|
waddr_mul == QPU_W_TMU0_B ||
|
|
waddr_mul == QPU_W_TMU1_S ||
|
|
waddr_mul == QPU_W_TMU1_T ||
|
|
waddr_mul == QPU_W_TMU1_R ||
|
|
waddr_mul == QPU_W_TMU1_B
|
|
)
|
|
{
|
|
numTMUwrites++;
|
|
}
|
|
|
|
if(!is_sem && (sig_bits == QPU_SIG_LOAD_TMU0 || sig_bits == QPU_SIG_LOAD_TMU1))
|
|
{
|
|
shader->numTextureSamples++;
|
|
shader->numFragUniformReads++;
|
|
}
|
|
|
|
//if it's an ALU instruction
|
|
if(!is_sem && sig_bits != QPU_SIG_LOAD_IMM && sig_bits != QPU_SIG_BRANCH)
|
|
{
|
|
shader->numFragALUcycles++;
|
|
|
|
if(waddr_add == QPU_W_NOP)
|
|
{
|
|
shader->numEmptyFragALUinstructions++;
|
|
}
|
|
|
|
if(waddr_mul == QPU_W_NOP)
|
|
{
|
|
shader->numEmptyFragALUinstructions++;
|
|
}
|
|
|
|
unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << QPU_RADDR_A_SHIFT)) >> QPU_RADDR_A_SHIFT);
|
|
unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << QPU_RADDR_B_SHIFT)) >> QPU_RADDR_B_SHIFT);
|
|
|
|
if(raddr_a == QPU_R_VARY)
|
|
{
|
|
shader->numVaryings++;
|
|
}
|
|
else if(raddr_a == QPU_R_UNIF)
|
|
{
|
|
shader->numFragUniformReads++;
|
|
}
|
|
|
|
//don't count small immediates
|
|
if(sig_bits != QPU_SIG_SMALL_IMM && raddr_b == QPU_R_VARY)
|
|
{
|
|
shader->numVaryings++;
|
|
}
|
|
else if(sig_bits != QPU_SIG_SMALL_IMM && raddr_b == QPU_R_UNIF)
|
|
{
|
|
shader->numFragUniformReads++;
|
|
}
|
|
}
|
|
else if(!is_sem && sig_bits == QPU_SIG_BRANCH)
|
|
{
|
|
shader->numFragBranches++;
|
|
}
|
|
}
|
|
|
|
if(numTMUwrites > 1)
|
|
{
|
|
shader->numFragUniformReads += numTMUwrites;
|
|
}
|
|
}
|
|
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX || c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
|
|
{
|
|
unsigned waddr_add = ((ci->instructions[c][d] & (0x3fll << QPU_WADDR_ADD_SHIFT)) >> QPU_WADDR_ADD_SHIFT);
|
|
unsigned waddr_mul = ((ci->instructions[c][d] & (0x3fll << QPU_WADDR_MUL_SHIFT)) >> QPU_WADDR_MUL_SHIFT);
|
|
|
|
unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
|
|
unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << QPU_SIG_SHIFT)) >> QPU_SIG_SHIFT);
|
|
|
|
if(waddr_add == QPU_W_SFU_RECIP ||
|
|
waddr_add == QPU_W_SFU_RECIPSQRT ||
|
|
waddr_add == QPU_W_SFU_EXP ||
|
|
waddr_add == QPU_W_SFU_LOG)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertSFUoperations++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordSFUoperations++;
|
|
}
|
|
}
|
|
|
|
if(waddr_mul == QPU_W_SFU_RECIP ||
|
|
waddr_mul == QPU_W_SFU_RECIPSQRT ||
|
|
waddr_mul == QPU_W_SFU_EXP ||
|
|
waddr_mul == QPU_W_SFU_LOG)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertSFUoperations++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordSFUoperations++;
|
|
}
|
|
}
|
|
|
|
if(waddr_add == QPU_W_VPM || waddr_mul == QPU_W_VPM)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertVPMwrites++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordVPMwrites++;
|
|
}
|
|
}
|
|
|
|
//if it's an ALU instruction
|
|
if(!is_sem && sig_bits != QPU_SIG_LOAD_IMM && sig_bits != QPU_SIG_BRANCH)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertALUcycles++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordALUcycles++;
|
|
}
|
|
|
|
if(waddr_add == QPU_W_NOP)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numEmptyVertALUinstructions++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numEmptyCoordALUinstructions++;
|
|
}
|
|
}
|
|
|
|
if(waddr_mul == QPU_W_NOP)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numEmptyVertALUinstructions++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numEmptyCoordALUinstructions++;
|
|
}
|
|
}
|
|
|
|
unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << QPU_RADDR_A_SHIFT)) >> QPU_RADDR_A_SHIFT);
|
|
unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << QPU_RADDR_B_SHIFT)) >> QPU_RADDR_B_SHIFT);
|
|
|
|
if(raddr_a == QPU_R_VPM)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertVPMreads++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordVPMreads++;
|
|
}
|
|
}
|
|
else if(raddr_a == QPU_R_UNIF)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertUniformReads++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordUniformReads++;
|
|
}
|
|
}
|
|
|
|
//don't count small immediates
|
|
if(sig_bits != QPU_SIG_SMALL_IMM && raddr_b == QPU_R_VPM)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertVPMreads++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordVPMreads++;
|
|
}
|
|
}
|
|
else if(sig_bits != QPU_SIG_SMALL_IMM && raddr_b == QPU_R_UNIF)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertUniformReads++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordUniformReads++;
|
|
}
|
|
}
|
|
}
|
|
else if(!is_sem && sig_bits == QPU_SIG_BRANCH)
|
|
{
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertBranches++;
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordBranches++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(c == VK_RPI_ASSEMBLY_TYPE_FRAGMENT)
|
|
{
|
|
shader->numFragCycles = ci->numInstructions[c];
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
|
|
{
|
|
shader->numVertCycles = ci->numInstructions[c];
|
|
}
|
|
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
|
|
{
|
|
shader->numCoordCycles = ci->numInstructions[c];
|
|
}
|
|
|
|
shader->sizes[c] = ci->numInstructions[c]*sizeof(uint64_t);
|
|
|
|
/**
|
|
for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e)
|
|
{
|
|
printf("%#llx ", ci->instructions[c][e]);
|
|
disassemble_qpu_asm(ci->instructions[c][e]);
|
|
}
|
|
printf("\n");
|
|
/**/
|
|
|
|
shader->bos[c] = vc4_bo_alloc_shader(controlFd, ci->instructions[c], &shader->sizes[c]);
|
|
assert(shader->bos[c]);
|
|
}
|
|
else
|
|
{
|
|
shader->bos[c] = 0;
|
|
shader->sizes[c] = 0;
|
|
}
|
|
|
|
if(ci->numMappings)
|
|
{
|
|
shader->numMappings[c] = ci->numMappings[c];
|
|
|
|
if(ci->numMappings[c] > 0)
|
|
{
|
|
shader->mappings[c] = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings[c], 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
if(!shader->mappings[c])
|
|
{
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
memcpy(shader->mappings[c], ci->mappings[c], sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings[c]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
shader->numMappings[c] = 0;
|
|
shader->mappings[c] = 0;
|
|
}
|
|
}
|
|
|
|
assert(hadVertex == hadCoordinate);
|
|
|
|
*pShaderModule = shader;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void rpi_vkDestroyShaderModule(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator)
|
|
{
|
|
assert(device);
|
|
|
|
_shaderModule* shader = shaderModule;
|
|
|
|
if(shader)
|
|
{
|
|
for(int c = 0; c < VK_RPI_ASSEMBLY_TYPE_MAX; ++c)
|
|
{
|
|
if(shader->bos[c])
|
|
{
|
|
vc4_bo_free(controlFd, shader->bos[c], 0, shader->sizes[c]);
|
|
}
|
|
|
|
if(shader->numMappings[c]>0)
|
|
{
|
|
FREE(shader->mappings[c]);
|
|
}
|
|
}
|
|
|
|
FREE(shader);
|
|
}
|
|
}
|