2019-09-30 22:30:37 +02:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "common.h"
|
|
|
|
#include "QPUassembler/qpu_assembler.h"
|
|
|
|
#include "modeset.h"
|
|
|
|
#include "vkExtFunctions.h"
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
//TODO collect shader performance data
|
|
|
|
//eg number of texture samples etc.
|
|
|
|
//TODO check if shader has flow control and make sure instance also has flow control
|
|
|
|
//TODO make sure instance has threaded fs if shader contains thread switch
|
|
|
|
|
2019-10-01 20:23:52 +02:00
|
|
|
VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkPhysicalDevice physicalDevice)
|
2019-09-30 22:30:37 +02:00
|
|
|
{
|
|
|
|
assert(physicalDevice);
|
2019-10-01 20:23:52 +02:00
|
|
|
|
|
|
|
_physicalDevice* ptr = physicalDevice;
|
|
|
|
VkRpiShaderModuleAssemblyCreateInfoEXT* ci = ptr->customData;
|
|
|
|
const const VkAllocationCallbacks* pAllocator = ci->pAllocator;
|
|
|
|
|
|
|
|
assert(ci);
|
|
|
|
assert(ci->pShaderModule);
|
|
|
|
assert(ci->asmStrings);
|
2019-09-30 22:30:37 +02:00
|
|
|
|
|
|
|
_shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
|
|
|
|
if(!shader)
|
|
|
|
{
|
|
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
}
|
|
|
|
|
|
|
|
shader->hasThreadSwitch = 0;
|
|
|
|
|
|
|
|
for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c)
|
|
|
|
{
|
2019-10-01 20:23:52 +02:00
|
|
|
if(ci->asmStrings[c])
|
2019-09-30 22:30:37 +02:00
|
|
|
{
|
2019-10-01 20:23:52 +02:00
|
|
|
uint32_t numInstructions = get_num_instructions(ci->asmStrings[c]);
|
2019-09-30 22:30:37 +02:00
|
|
|
uint32_t size = sizeof(uint64_t)*numInstructions;
|
|
|
|
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
|
|
|
|
//why not map kernel space mem to user space instead?
|
|
|
|
shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
if(!shader->instructions[c])
|
|
|
|
{
|
|
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
}
|
|
|
|
|
|
|
|
//need to create a temporary copy as the assembly algorithm is destructive
|
2019-10-01 20:23:52 +02:00
|
|
|
uint32_t stringLength = strlen(ci->asmStrings[c]);
|
2019-09-30 22:30:37 +02:00
|
|
|
char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
2019-10-01 20:23:52 +02:00
|
|
|
memcpy(tmpShaderStr, ci->asmStrings[c], stringLength+1);
|
2019-09-30 22:30:37 +02:00
|
|
|
|
|
|
|
assemble_qpu_asm(tmpShaderStr, shader->instructions[c]);
|
|
|
|
|
|
|
|
FREE(tmpShaderStr);
|
|
|
|
|
|
|
|
for(uint64_t d = 0; d < numInstructions; ++d)
|
|
|
|
{
|
|
|
|
uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60;
|
|
|
|
if(s == 2ll)
|
|
|
|
{
|
|
|
|
shader->hasThreadSwitch = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
shader->numVaryings = 0;
|
|
|
|
for(uint64_t d = 0; d < numInstructions; ++d)
|
|
|
|
{
|
|
|
|
unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
|
|
|
|
unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60);
|
|
|
|
|
|
|
|
//if it's an ALU instruction
|
|
|
|
if(!is_sem && sig_bits != 14 && sig_bits != 15)
|
|
|
|
{
|
|
|
|
unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18);
|
|
|
|
unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12);
|
|
|
|
|
|
|
|
if(raddr_a == 35)
|
|
|
|
{
|
|
|
|
shader->numVaryings++;
|
|
|
|
}
|
|
|
|
|
|
|
|
//don't count small immediates
|
|
|
|
if(sig_bits != 13 && raddr_b == 35)
|
|
|
|
{
|
|
|
|
shader->numVaryings++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
shader->sizes[c] = size;
|
2020-03-10 21:20:35 +01:00
|
|
|
|
|
|
|
|
|
|
|
for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e)
|
|
|
|
{
|
|
|
|
printf("%#llx ", shader->instructions[c][e]);
|
|
|
|
disassemble_qpu_asm(shader->instructions[c][e]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
shader->bos[c] = vc4_bo_alloc_shader(controlFd, shader->instructions[c], &shader->sizes[c]);
|
2019-09-30 22:30:37 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
shader->bos[c] = 0;
|
|
|
|
shader->sizes[c] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-01 20:23:52 +02:00
|
|
|
shader->numMappings = ci->numMappings;
|
2019-09-30 22:30:37 +02:00
|
|
|
|
2019-10-01 20:23:52 +02:00
|
|
|
if(ci->numMappings > 0)
|
2019-09-30 22:30:37 +02:00
|
|
|
{
|
2019-10-01 20:23:52 +02:00
|
|
|
shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
2019-09-30 22:30:37 +02:00
|
|
|
|
|
|
|
if(!shader->mappings)
|
|
|
|
{
|
|
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
}
|
|
|
|
|
2019-10-01 20:23:52 +02:00
|
|
|
memcpy(shader->mappings, ci->mappings, sizeof(VkRpiAssemblyMappingEXT)*ci->numMappings);
|
2019-09-30 22:30:37 +02:00
|
|
|
}
|
|
|
|
|
2019-10-01 20:23:52 +02:00
|
|
|
*ci->pShaderModule = shader;
|
2019-09-30 22:30:37 +02:00
|
|
|
|
|
|
|
return VK_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|