1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-11-28 10:24:15 +01:00
This commit is contained in:
Unknown 2019-09-30 21:30:37 +01:00
parent 40c24dce9c
commit 2d8132011e
9 changed files with 186 additions and 210 deletions

View File

@ -83,6 +83,10 @@ typedef struct VkInstance_T _instance;
typedef struct VkPhysicalDevice_T
{
VK_LOADER_DATA loaderData;
//apparently unknown physical device extensions can't quite pass anything other than VkPhysicalDevice
//now that object has to have the loader magic
//so we just provide a custom data pointer so that our extensions can be used...
void* customData;
//hardware id?
char* path;
_instance* instance;

View File

@ -948,11 +948,6 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkGetDescriptorSetLayoutSupport(
const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
VkDescriptorSetLayoutSupport* pSupport);
VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySurfaceKHR(
VkInstance instance,
VkSurfaceKHR surface,
const VkAllocationCallbacks* pAllocator);
#ifdef __cplusplus
}
#endif

View File

@ -29,18 +29,21 @@ VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance in
void* ptr = rpi_vkGetInstanceProcAddr(instance, pName);
fprintf(stderr, "-----------------rpi_vkGetInstanceProcAddr: %s, %p\n", pName, ptr);
return ptr;
}
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(VkInstance instance,
const char* pName)
{
fprintf(stderr, "-----------------vk_icdGetPhysicalDeviceProcAddr: %s\n", pName);
void* ptr = 0;
RETFUNC(vkGetRpiExtensionPointerEXT);
if(!strcmp(pName, "vkCreateRpiSurfaceEXT"))
ptr = &rpi_vkCreateRpiSurfaceEXT;
return 0;
if(!strcmp(pName, "vkCreateShaderModuleFromRpiAssemblyEXT"))
ptr = &rpi_vkCreateShaderModuleFromRpiAssemblyEXT;
return ptr;
}
/*

View File

@ -6,116 +6,6 @@
#include "vkExt.h"
//TODO collect shader performance data
//eg number of texture samples etc.
//TODO check if shader has flow control and make sure instance also has flow control
//TODO make sure instance has threaded fs if shader contains thread switch
VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device,
VkRpiShaderModuleAssemblyCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkShaderModule* pShaderModule)
{
assert(device);
assert(pCreateInfo);
assert(pShaderModule);
assert(pCreateInfo->asmStrings);
_shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
shader->hasThreadSwitch = 0;
for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c)
{
if(pCreateInfo->asmStrings[c])
{
uint32_t numInstructions = get_num_instructions(pCreateInfo->asmStrings[c]);
uint32_t size = sizeof(uint64_t)*numInstructions;
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->instructions[c])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
//need to create a temporary copy as the assembly algorithm is destructive
uint32_t stringLength = strlen(pCreateInfo->asmStrings[c]);
char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
memcpy(tmpShaderStr, pCreateInfo->asmStrings[c], stringLength+1);
assemble_qpu_asm(tmpShaderStr, shader->instructions[c]);
FREE(tmpShaderStr);
for(uint64_t d = 0; d < numInstructions; ++d)
{
uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60;
if(s == 2ll)
{
shader->hasThreadSwitch = 1;
break;
}
}
shader->numVaryings = 0;
for(uint64_t d = 0; d < numInstructions; ++d)
{
unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
{
unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12);
if(raddr_a == 35)
{
shader->numVaryings++;
}
//don't count small immediates
if(sig_bits != 13 && raddr_b == 35)
{
shader->numVaryings++;
}
}
}
shader->sizes[c] = size;
}
else
{
shader->bos[c] = 0;
shader->sizes[c] = 0;
}
}
shader->numMappings = pCreateInfo->numMappings;
if(pCreateInfo->numMappings > 0)
{
shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->mappings)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(shader->mappings, pCreateInfo->mappings, sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings);
}
*pShaderModule = shader;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateShaderModule
*/

View File

@ -1,5 +1,22 @@
#pragma once
#include <vulkan/vulkan.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef VkResult (*PFN_vkCreateRpiSurfaceEXT)(
VkPhysicalDevice physicalDevice);
typedef VkResult (*PFN__vkCreateShaderModuleFromRpiAssemblyEXT)(
VkPhysicalDevice physicalDevice);
typedef struct VkRpiPhysicalDevice
{
void* dummy;
void* customData;
} VkRpiPhysicalDevice;
//we need something like the other platforms to create surfaces on the RPI
//so I created this little "extension"
//full spec in this file ;)
@ -67,3 +84,8 @@ typedef struct VkRpiShaderModuleAssemblyCreateInfoEXT {
VkRpiAssemblyMappingEXT* mappings;
uint32_t numMappings;
} VkRpiShaderModuleAssemblyCreateInfoEXT;
#ifdef __cplusplus
}
#endif

140
driver/vkExtFunctions.c Normal file
View File

@ -0,0 +1,140 @@
#pragma once
#include "common.h"
#include "QPUassembler/qpu_assembler.h"
#include "modeset.h"
#include "vkExtFunctions.h"
#ifdef __cplusplus
extern "C" {
#endif
//TODO collect shader performance data
//eg number of texture samples etc.
//TODO check if shader has flow control and make sure instance also has flow control
//TODO make sure instance has threaded fs if shader contains thread switch
VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkPhysicalDevice physicalDevice,
VkRpiShaderModuleAssemblyCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkShaderModule* pShaderModule)
{
assert(physicalDevice);
assert(pCreateInfo);
assert(pShaderModule);
assert(pCreateInfo->asmStrings);
_shaderModule* shader = ALLOCATE(sizeof(_shaderModule), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
shader->hasThreadSwitch = 0;
for(int c = 0; c < RPI_ASSEMBLY_TYPE_MAX; ++c)
{
if(pCreateInfo->asmStrings[c])
{
uint32_t numInstructions = get_num_instructions(pCreateInfo->asmStrings[c]);
uint32_t size = sizeof(uint64_t)*numInstructions;
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->instructions[c])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
//need to create a temporary copy as the assembly algorithm is destructive
uint32_t stringLength = strlen(pCreateInfo->asmStrings[c]);
char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
memcpy(tmpShaderStr, pCreateInfo->asmStrings[c], stringLength+1);
assemble_qpu_asm(tmpShaderStr, shader->instructions[c]);
FREE(tmpShaderStr);
for(uint64_t d = 0; d < numInstructions; ++d)
{
uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60;
if(s == 2ll)
{
shader->hasThreadSwitch = 1;
break;
}
}
shader->numVaryings = 0;
for(uint64_t d = 0; d < numInstructions; ++d)
{
unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
{
unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12);
if(raddr_a == 35)
{
shader->numVaryings++;
}
//don't count small immediates
if(sig_bits != 13 && raddr_b == 35)
{
shader->numVaryings++;
}
}
}
shader->sizes[c] = size;
}
else
{
shader->bos[c] = 0;
shader->sizes[c] = 0;
}
}
shader->numMappings = pCreateInfo->numMappings;
if(pCreateInfo->numMappings > 0)
{
shader->mappings = ALLOCATE(sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->mappings)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(shader->mappings, pCreateInfo->mappings, sizeof(VkRpiAssemblyMappingEXT)*pCreateInfo->numMappings);
}
*pShaderModule = shader;
return VK_SUCCESS;
}
/*
* Implementation of our RPI specific "extension"
*/
VkResult rpi_vkCreateRpiSurfaceEXT(
VkPhysicalDevice physicalDevice)
{
assert(physicalDevice);
//TODO use allocator!
_physicalDevice* ptr = physicalDevice;
*(VkSurfaceKHR*)ptr->customData = (VkSurfaceKHR)modeset_create(controlFd);
return VK_SUCCESS;
}
#ifdef __cplusplus
}
#endif

View File

@ -9,37 +9,16 @@ extern "C" {
//extension name something like: VK_KHR_rpi_surface
//extension that allows developers to create a surface to render to on Raspbian Stretch Lite
extern VkResult rpi_vkCreateRpiSurfaceEXT(
VkInstance instance,
const VkRpiSurfaceCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface);
VkPhysicalDevice physicalDevice);
//extension that allows developers to submit QPU assembly directly and thus hand optimise code
extern VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(
VkDevice device,
VkPhysicalDevice physicalDevice,
VkRpiShaderModuleAssemblyCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkShaderModule* pShaderModule
);
extern void* _getFuncPtr(const char* name);
static VkResult rpi_vkGetRpiExtensionPointerEXT(
VkPhysicalDevice physicalDevice
)
{
//TODO how do we handle our "custom" extensions towards the loader????
uint32_t ret = 0;//(uint32_t)_getFuncPtr((const char*)physicalDevice);
if(ret)
{
return (VkResult)ret;
}
return VK_SUCCESS;
}
//TODO performance counters / perfmon

View File

@ -5,56 +5,13 @@
#include "declarations.h"
#include "vkExtFunctions.h"
#ifdef __cplusplus
extern "C" {
#endif
void* _getFuncPtr(const char* name)
{
if(!strcmp(name, "vkCreateRpiSurfaceEXT"))
return (void*)&rpi_vkCreateRpiSurfaceEXT;
if(!strcmp(name, "vkCreateShaderModuleFromRpiAssemblyEXT"))
return (void*)&rpi_vkCreateShaderModuleFromRpiAssemblyEXT;
return 0;
}
#ifdef __cplusplus
}
#endif
/*
* Implementation of our RPI specific "extension"
*/
VkResult rpi_vkCreateRpiSurfaceEXT(
VkInstance instance,
const VkRpiSurfaceCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
{
assert(instance);
//assert(pCreateInfo); //ignored for now
assert(pSurface);
//TODO use allocator!
*pSurface = (VkSurfaceKHR)modeset_create(controlFd);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySurfaceKHR
* Destroying a VkSurfaceKHR merely severs the connection between Vulkan and the native surface,
* and does not imply destroying the native surface, closing a window, or similar behavior
* (but we'll do so anyways...)
*/
VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySurfaceKHR(
VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR(
VkInstance instance,
VkSurfaceKHR surface,
const VkAllocationCallbacks* pAllocator)
@ -79,7 +36,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySurfaceKHR(
*
* capabilities the specified device supports for a swapchain created for the surface
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
@ -114,7 +71,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
* at most pSurfaceFormatCount structures will be written. If pSurfaceFormatCount is smaller than the number of format pairs supported for the given surface,
* VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceFormatsKHR(
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pSurfaceFormatCount,
@ -159,7 +116,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceFormatsKHR(
* If pPresentModeCount is smaller than the number of presentation modes supported for the given surface, VK_INCOMPLETE will be returned instead of
* VK_SUCCESS to indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfacePresentModesKHR(
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pPresentModeCount,
@ -199,7 +156,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfacePresentModesKHR(
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSwapchainKHR
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSwapchainKHR(
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR(
VkDevice device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
@ -447,7 +404,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySwapchainKHR(
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceSupportKHR
* does this queue family support presentation to this surface?
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetPhysicalDeviceSurfaceSupportKHR(
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
VkSurfaceKHR surface,

View File

@ -253,28 +253,14 @@ void createInstance() {
}
}
extern "C" {
typedef VkResult (*PFN_vkCreateRpiSurfaceEXT)(
VkPhysicalDevice physicalDevice,
const VkRpiSurfaceCreateInfoEXT* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface);
typedef VkResult (*PFN_vkGetRpiExtensionPointerEXT)(
VkPhysicalDevice physicalDevice
);
}
PFN_vkGetRpiExtensionPointerEXT vkGetRpiExtensionPointerEXT = 0;
void createWindowSurface() {
vkGetRpiExtensionPointerEXT = (PFN_vkGetRpiExtensionPointerEXT)vkGetInstanceProcAddr(instance, "vkGetRpiExtensionPointerEXT");
fprintf(stderr, "%p\n", vkGetRpiExtensionPointerEXT);
PFN_vkCreateRpiSurfaceEXT vkCreateRpiSurfaceEXT = 0;
vkCreateRpiSurfaceEXT = (PFN_vkCreateRpiSurfaceEXT)vkGetRpiExtensionPointerEXT((VkPhysicalDevice)"vkCreateRpiSurfaceEXT");
fprintf(stderr, "%p\n", vkCreateRpiSurfaceEXT);
vkCreateRpiSurfaceEXT = (PFN_vkCreateRpiSurfaceEXT)vkGetInstanceProcAddr(instance, "vkCreateRpiSurfaceEXT");
if (vkCreateRpiSurfaceEXT(physicalDevice, 0, 0, &windowSurface) != VK_SUCCESS) {
VkRpiPhysicalDevice* ptr = (VkRpiPhysicalDevice*)physicalDevice;
ptr->customData = &windowSurface;
if (vkCreateRpiSurfaceEXT(physicalDevice) != VK_SUCCESS) {
std::cerr << "failed to create window surface!" << std::endl;
assert(0);
}