1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-01 13:24:20 +01:00
rpi-vk-driver/driver/shader.c
2020-04-21 21:53:11 +01:00

165 lines
3.7 KiB
C

#include "common.h"
#include "kernel/vc4_packet.h"
#include "QPUassembler/qpu_assembler.h"
#include "vkExt.h"
//TODO collect shader performance data
//eg number of texture samples etc.
//TODO check if shader has flow control and make sure instance also has flow control
//TODO make sure instance has threaded fs if shader contains thread switch
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateShaderModule
*/
VkResult rpi_vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule)
{
uint32_t magic = pCreateInfo->pCode[2];
VkRpiShaderModuleAssemblyCreateInfoEXT* ci = pCreateInfo->pCode[4];
//shader magic doesn't add up
if(magic != 0x14E45250)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
assert(ci);
assert(pShaderModule);
assert(ci->instructions);
_shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
shader->hasThreadSwitch = 0;
uint32_t hadVertex = 0, hadCoordinate = 0;
for(int c = 0; c < VK_RPI_ASSEMBLY_TYPE_MAX; ++c)
{
if(ci->instructions[c])
{
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
{
hadVertex = 1;
}
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
{
hadCoordinate = 1;
}
if(c == VK_RPI_ASSEMBLY_TYPE_FRAGMENT)
{
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
{
uint64_t s = (ci->instructions[c][d] & (0xfll << 60)) >> 60;
if(s == 2ll || s == 6ll)
{
shader->hasThreadSwitch = 1;
break;
}
}
}
shader->numVaryings = 0;
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
{
unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
{
unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << 12)) >> 12);
if(raddr_a == 35)
{
shader->numVaryings++;
}
//don't count small immediates
if(sig_bits != 13 && raddr_b == 35)
{
shader->numVaryings++;
}
}
}
shader->sizes[c] = ci->numInstructions[c]*sizeof(uint64_t);
for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e)
{
printf("%#llx ", ci->instructions[c][e]);
disassemble_qpu_asm(ci->instructions[c][e]);
}
printf("\n");
shader->bos[c] = vc4_bo_alloc_shader(controlFd, ci->instructions[c], &shader->sizes[c]);
}
else
{
shader->bos[c] = 0;
shader->sizes[c] = 0;
}
if(ci->numMappings)
{
shader->numMappings[c] = ci->numMappings[c];
if(ci->numMappings[c] > 0)
{
shader->mappings[c] = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings[c], 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->mappings[c])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(shader->mappings[c], ci->mappings[c], sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings[c]);
}
}
else
{
shader->numMappings[c] = 0;
shader->mappings[c] = 0;
}
}
assert(hadVertex == hadCoordinate);
*pShaderModule = shader;
return VK_SUCCESS;
}
void rpi_vkDestroyShaderModule(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks* pAllocator)
{
assert(device);
_shaderModule* shader = shaderModule;
if(shader)
{
for(int c = 0; c < VK_RPI_ASSEMBLY_TYPE_MAX; ++c)
{
if(shader->bos[c])
{
vc4_bo_free(controlFd, shader->bos[c], 0, shader->sizes[c]);
}
}
if(shader->numMappings>0)
{
FREE(shader->mappings);
}
FREE(shader);
}
}