1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-12 00:08:54 +01:00
rpi-vk-driver/driver/vkExtFunctions.c
2019-09-30 21:30:37 +01:00

141 lines
3.6 KiB
C

#pragma once
#include "common.h"
#include "QPUassembler/qpu_assembler.h"
#include "modeset.h"
#include "vkExtFunctions.h"
#ifdef __cplusplus
extern "C" {
#endif
//TODO collect shader performance data
//eg number of texture samples etc.
//TODO check if shader has flow control and make sure instance also has flow control
//TODO make sure instance has threaded fs if shader contains thread switch
VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkPhysicalDevice physicalDevice,
VkRpiShaderModuleAssemblyCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkShaderModule* pShaderModule)
{
assert(physicalDevice);
assert(pCreateInfo);
assert(pShaderModule);
assert(pCreateInfo->asmStrings);
_shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
shader->hasThreadSwitch = 0;
for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c)
{
if(pCreateInfo->asmStrings[c])
{
uint32_t numInstructions = get_num_instructions(pCreateInfo->asmStrings[c]);
uint32_t size = sizeof(uint64_t)*numInstructions;
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->instructions[c])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
//need to create a temporary copy as the assembly algorithm is destructive
uint32_t stringLength = strlen(pCreateInfo->asmStrings[c]);
char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
memcpy(tmpShaderStr, pCreateInfo->asmStrings[c], stringLength+1);
assemble_qpu_asm(tmpShaderStr, shader->instructions[c]);
FREE(tmpShaderStr);
for(uint64_t d = 0; d < numInstructions; ++d)
{
uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60;
if(s == 2ll)
{
shader->hasThreadSwitch = 1;
break;
}
}
shader->numVaryings = 0;
for(uint64_t d = 0; d < numInstructions; ++d)
{
unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
{
unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12);
if(raddr_a == 35)
{
shader->numVaryings++;
}
//don't count small immediates
if(sig_bits != 13 && raddr_b == 35)
{
shader->numVaryings++;
}
}
}
shader->sizes[c] = size;
}
else
{
shader->bos[c] = 0;
shader->sizes[c] = 0;
}
}
shader->numMappings = pCreateInfo->numMappings;
if(pCreateInfo->numMappings > 0)
{
shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->mappings)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(shader->mappings, pCreateInfo->mappings, sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings);
}
*pShaderModule = shader;
return VK_SUCCESS;
}
/*
* Implementation of our RPI specific "extension"
*/
VkResult rpi_vkCreateRpiSurfaceEXT(
VkPhysicalDevice physicalDevice)
{
assert(physicalDevice);
//TODO use allocator!
_physicalDevice* ptr = physicalDevice;
*(VkSurfaceKHR*)ptr->customData = (VkSurfaceKHR)modeset_create(controlFd);
return VK_SUCCESS;
}
#ifdef __cplusplus
}
#endif