diff --git a/driver/common.h b/driver/common.h index c33243f..707e097 100644 --- a/driver/common.h +++ b/driver/common.h @@ -83,6 +83,10 @@ typedef struct VkInstance_T _instance; typedef struct VkPhysicalDevice_T { VK_LOADER_DATA loaderData; + //apparently unknown physical device extensions can't quite pass anything other than VkPhysicalDevice + //now that object has to have the loader magic + //so we just provide a custom data pointer so that our extensions can be used... + void* customData; //hardware id? char* path; _instance* instance; diff --git a/driver/declarations.h b/driver/declarations.h index 46bd098..6d0f81a 100644 --- a/driver/declarations.h +++ b/driver/declarations.h @@ -948,11 +948,6 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkGetDescriptorSetLayoutSupport( const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport); -VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySurfaceKHR( - VkInstance instance, - VkSurfaceKHR surface, - const VkAllocationCallbacks* pAllocator); - #ifdef __cplusplus } #endif diff --git a/driver/instance.c b/driver/instance.c index f6f4807..59da11e 100644 --- a/driver/instance.c +++ b/driver/instance.c @@ -29,18 +29,21 @@ VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance in void* ptr = rpi_vkGetInstanceProcAddr(instance, pName); - fprintf(stderr, "-----------------rpi_vkGetInstanceProcAddr: %s, %p\n", pName, ptr); return ptr; } VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(VkInstance instance, const char* pName) { - fprintf(stderr, "-----------------vk_icdGetPhysicalDeviceProcAddr: %s\n", pName); + void* ptr = 0; - RETFUNC(vkGetRpiExtensionPointerEXT); + if(!strcmp(pName, "vkCreateRpiSurfaceEXT")) + ptr = &rpi_vkCreateRpiSurfaceEXT; - return 0; + if(!strcmp(pName, "vkCreateShaderModuleFromRpiAssemblyEXT")) + ptr = &rpi_vkCreateShaderModuleFromRpiAssemblyEXT; + + return ptr; } /* diff --git a/driver/shader.c b/driver/shader.c index 8cf6994..319d41d 100644 --- a/driver/shader.c +++ b/driver/shader.c @@ -6,116 +6,6 @@ #include "vkExt.h" -//TODO collect shader performance data -//eg number of texture samples etc. -//TODO check if shader has flow control and make sure instance also has flow control -//TODO make sure instance has threaded fs if shader contains thread switch - -VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, - VkRpiShaderModuleAssemblyCreateInfoEXT* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkShaderModule* pShaderModule) -{ - assert(device); - assert(pCreateInfo); - assert(pShaderModule); - assert(pCreateInfo->asmStrings); - - _shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if(!shader) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - shader->hasThreadSwitch = 0; - - for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c) - { - if(pCreateInfo->asmStrings[c]) - { - uint32_t numInstructions = get_num_instructions(pCreateInfo->asmStrings[c]); - uint32_t size = sizeof(uint64_t)*numInstructions; - //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space - //why not map kernel space mem to user space instead? - shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if(!shader->instructions[c]) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - //need to create a temporary copy as the assembly algorithm is destructive - uint32_t stringLength = strlen(pCreateInfo->asmStrings[c]); - char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - memcpy(tmpShaderStr, pCreateInfo->asmStrings[c], stringLength+1); - - assemble_qpu_asm(tmpShaderStr, shader->instructions[c]); - - FREE(tmpShaderStr); - - for(uint64_t d = 0; d < numInstructions; ++d) - { - uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60; - if(s == 2ll) - { - shader->hasThreadSwitch = 1; - break; - } - } - - shader->numVaryings = 0; - for(uint64_t d = 0; d < numInstructions; ++d) - { - unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74; - unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60); - - //if it's an ALU instruction - if(!is_sem && sig_bits != 14 && sig_bits != 15) - { - unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18); - unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12); - - if(raddr_a == 35) - { - shader->numVaryings++; - } - - //don't count small immediates - if(sig_bits != 13 && raddr_b == 35) - { - shader->numVaryings++; - } - } - } - - shader->sizes[c] = size; - } - else - { - shader->bos[c] = 0; - shader->sizes[c] = 0; - } - } - - shader->numMappings = pCreateInfo->numMappings; - - if(pCreateInfo->numMappings > 0) - { - shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if(!shader->mappings) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - memcpy(shader->mappings, pCreateInfo->mappings, sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings); - } - - *pShaderModule = shader; - - return VK_SUCCESS; -} - /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateShaderModule */ diff --git a/driver/vkExt.h b/driver/vkExt.h index 5311c8a..84432b7 100644 --- a/driver/vkExt.h +++ b/driver/vkExt.h @@ -1,5 +1,22 @@ #pragma once +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef VkResult (*PFN_vkCreateRpiSurfaceEXT)( + VkPhysicalDevice physicalDevice); +typedef VkResult (*PFN__vkCreateShaderModuleFromRpiAssemblyEXT)( + VkPhysicalDevice physicalDevice); + +typedef struct VkRpiPhysicalDevice +{ + void* dummy; + void* customData; +} VkRpiPhysicalDevice; + //we need something like the other platforms to create surfaces on the RPI //so I created this little "extension" //full spec in this file ;) @@ -67,3 +84,8 @@ typedef struct VkRpiShaderModuleAssemblyCreateInfoEXT { VkRpiAssemblyMappingEXT* mappings; uint32_t numMappings; } VkRpiShaderModuleAssemblyCreateInfoEXT; + +#ifdef __cplusplus +} +#endif + diff --git a/driver/vkExtFunctions.c b/driver/vkExtFunctions.c new file mode 100644 index 0000000..8b149fb --- /dev/null +++ b/driver/vkExtFunctions.c @@ -0,0 +1,140 @@ +#pragma once + +#include "common.h" +#include "QPUassembler/qpu_assembler.h" +#include "modeset.h" +#include "vkExtFunctions.h" + +#ifdef __cplusplus +extern "C" { +#endif + +//TODO collect shader performance data +//eg number of texture samples etc. +//TODO check if shader has flow control and make sure instance also has flow control +//TODO make sure instance has threaded fs if shader contains thread switch + +VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkPhysicalDevice physicalDevice, + VkRpiShaderModuleAssemblyCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + assert(physicalDevice); + assert(pCreateInfo); + assert(pShaderModule); + assert(pCreateInfo->asmStrings); + + _shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if(!shader) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + shader->hasThreadSwitch = 0; + + for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c) + { + if(pCreateInfo->asmStrings[c]) + { + uint32_t numInstructions = get_num_instructions(pCreateInfo->asmStrings[c]); + uint32_t size = sizeof(uint64_t)*numInstructions; + //TODO this alloc feels kinda useless, we just copy the data anyway to kernel space + //why not map kernel space mem to user space instead? + shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if(!shader->instructions[c]) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + //need to create a temporary copy as the assembly algorithm is destructive + uint32_t stringLength = strlen(pCreateInfo->asmStrings[c]); + char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + memcpy(tmpShaderStr, pCreateInfo->asmStrings[c], stringLength+1); + + assemble_qpu_asm(tmpShaderStr, shader->instructions[c]); + + FREE(tmpShaderStr); + + for(uint64_t d = 0; d < numInstructions; ++d) + { + uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60; + if(s == 2ll) + { + shader->hasThreadSwitch = 1; + break; + } + } + + shader->numVaryings = 0; + for(uint64_t d = 0; d < numInstructions; ++d) + { + unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74; + unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60); + + //if it's an ALU instruction + if(!is_sem && sig_bits != 14 && sig_bits != 15) + { + unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18); + unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12); + + if(raddr_a == 35) + { + shader->numVaryings++; + } + + //don't count small immediates + if(sig_bits != 13 && raddr_b == 35) + { + shader->numVaryings++; + } + } + } + + shader->sizes[c] = size; + } + else + { + shader->bos[c] = 0; + shader->sizes[c] = 0; + } + } + + shader->numMappings = pCreateInfo->numMappings; + + if(pCreateInfo->numMappings > 0) + { + shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if(!shader->mappings) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + memcpy(shader->mappings, pCreateInfo->mappings, sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings); + } + + *pShaderModule = shader; + + return VK_SUCCESS; +} + +/* + * Implementation of our RPI specific "extension" + */ +VkResult rpi_vkCreateRpiSurfaceEXT( + VkPhysicalDevice physicalDevice) +{ + assert(physicalDevice); + + //TODO use allocator! + + _physicalDevice* ptr = physicalDevice; + *(VkSurfaceKHR*)ptr->customData = (VkSurfaceKHR)modeset_create(controlFd); + + return VK_SUCCESS; +} + +#ifdef __cplusplus +} +#endif diff --git a/driver/vkExtFunctions.h b/driver/vkExtFunctions.h index 0ec20e8..66c52ae 100644 --- a/driver/vkExtFunctions.h +++ b/driver/vkExtFunctions.h @@ -9,37 +9,16 @@ extern "C" { //extension name something like: VK_KHR_rpi_surface //extension that allows developers to create a surface to render to on Raspbian Stretch Lite extern VkResult rpi_vkCreateRpiSurfaceEXT( - VkInstance instance, - const VkRpiSurfaceCreateInfoEXT* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface); + VkPhysicalDevice physicalDevice); //extension that allows developers to submit QPU assembly directly and thus hand optimise code extern VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT( - VkDevice device, + VkPhysicalDevice physicalDevice, VkRpiShaderModuleAssemblyCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkShaderModule* pShaderModule ); -extern void* _getFuncPtr(const char* name); - -static VkResult rpi_vkGetRpiExtensionPointerEXT( - VkPhysicalDevice physicalDevice - ) -{ - //TODO how do we handle our "custom" extensions towards the loader???? - - uint32_t ret = 0;//(uint32_t)_getFuncPtr((const char*)physicalDevice); - - if(ret) - { - return (VkResult)ret; - } - - return VK_SUCCESS; -} - //TODO performance counters / perfmon diff --git a/driver/wsi.c b/driver/wsi.c index 622de16..552c7a7 100644 --- a/driver/wsi.c +++ b/driver/wsi.c @@ -5,56 +5,13 @@ #include "declarations.h" -#include "vkExtFunctions.h" - - -#ifdef __cplusplus -extern "C" { -#endif - -void* _getFuncPtr(const char* name) -{ - if(!strcmp(name, "vkCreateRpiSurfaceEXT")) - return (void*)&rpi_vkCreateRpiSurfaceEXT; - - if(!strcmp(name, "vkCreateShaderModuleFromRpiAssemblyEXT")) - return (void*)&rpi_vkCreateShaderModuleFromRpiAssemblyEXT; - - return 0; -} - -#ifdef __cplusplus -} -#endif - - -/* - * Implementation of our RPI specific "extension" - */ -VkResult rpi_vkCreateRpiSurfaceEXT( - VkInstance instance, - const VkRpiSurfaceCreateInfoEXT* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface) -{ - assert(instance); - //assert(pCreateInfo); //ignored for now - assert(pSurface); - - //TODO use allocator! - - *pSurface = (VkSurfaceKHR)modeset_create(controlFd); - - return VK_SUCCESS; -} - /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySurfaceKHR * Destroying a VkSurfaceKHR merely severs the connection between Vulkan and the native surface, * and does not imply destroying the native surface, closing a window, or similar behavior * (but we'll do so anyways...) */ -VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySurfaceKHR( +VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR( VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks* pAllocator) @@ -79,7 +36,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySurfaceKHR( * * capabilities the specified device supports for a swapchain created for the surface */ -VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceCapabilitiesKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR( VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) @@ -114,7 +71,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceCapabilitiesKHR( * at most pSurfaceFormatCount structures will be written. If pSurfaceFormatCount is smaller than the number of format pairs supported for the given surface, * VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned. */ -VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceFormatsKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR( VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pSurfaceFormatCount, @@ -159,7 +116,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceFormatsKHR( * If pPresentModeCount is smaller than the number of presentation modes supported for the given surface, VK_INCOMPLETE will be returned instead of * VK_SUCCESS to indicate that not all the available values were returned. */ -VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfacePresentModesKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR( VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t* pPresentModeCount, @@ -199,7 +156,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfacePresentModesKHR( /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSwapchainKHR */ -VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSwapchainKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, @@ -447,7 +404,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySwapchainKHR( * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceSupportKHR * does this queue family support presentation to this surface? */ -VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceSupportKHR( +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR( VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, VkSurfaceKHR surface, diff --git a/test/blending/blending.cpp b/test/blending/blending.cpp index fe27168..45d7e1f 100644 --- a/test/blending/blending.cpp +++ b/test/blending/blending.cpp @@ -253,28 +253,14 @@ void createInstance() { } } -extern "C" { -typedef VkResult (*PFN_vkCreateRpiSurfaceEXT)( - VkPhysicalDevice physicalDevice, - const VkRpiSurfaceCreateInfoEXT* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface); -typedef VkResult (*PFN_vkGetRpiExtensionPointerEXT)( - VkPhysicalDevice physicalDevice - ); -} - -PFN_vkGetRpiExtensionPointerEXT vkGetRpiExtensionPointerEXT = 0; - void createWindowSurface() { - vkGetRpiExtensionPointerEXT = (PFN_vkGetRpiExtensionPointerEXT)vkGetInstanceProcAddr(instance, "vkGetRpiExtensionPointerEXT"); - fprintf(stderr, "%p\n", vkGetRpiExtensionPointerEXT); - PFN_vkCreateRpiSurfaceEXT vkCreateRpiSurfaceEXT = 0; - vkCreateRpiSurfaceEXT = (PFN_vkCreateRpiSurfaceEXT)vkGetRpiExtensionPointerEXT((VkPhysicalDevice)"vkCreateRpiSurfaceEXT"); - fprintf(stderr, "%p\n", vkCreateRpiSurfaceEXT); + vkCreateRpiSurfaceEXT = (PFN_vkCreateRpiSurfaceEXT)vkGetInstanceProcAddr(instance, "vkCreateRpiSurfaceEXT"); - if (vkCreateRpiSurfaceEXT(physicalDevice, 0, 0, &windowSurface) != VK_SUCCESS) { + VkRpiPhysicalDevice* ptr = (VkRpiPhysicalDevice*)physicalDevice; + ptr->customData = &windowSurface; + + if (vkCreateRpiSurfaceEXT(physicalDevice) != VK_SUCCESS) { std::cerr << "failed to create window surface!" << std::endl; assert(0); }