1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-12 00:08:54 +01:00
rpi-vk-driver/driver/sync.c

550 lines
18 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "common.h"
#include "kernel/vc4_packet.h"
//-----------------------------
//Semaphore vs Fence:
// Semaphore is GPU to GPU sync
// Fence is GPU to CPU sync
// Both are signalled by the GPU
// Both are multi-queue
// But Fence can be waited on by the CPU
// Semaphore can only be waited on by the GPU
//
//Events are general can be signalled by the CPU or the GPU
// But can only be waited on by the GPU
// Limited to a single queue
//
//TODO as a result the current semaphore
//implementation is wrong
//maybe use:
//clInsertWaitOnSemaphore
//clInsertIncrementSemaphore
//
//seems like each binCL needs to end with increment semaphore
//signalling that binning is done
//and each renderCL starts with a wait semaphore (to wait for binning)
//
//in theory we could add a wait for semaphore to the start of a binCL
//and an increment semaphore to either to the end of another binCL or renderCL
//but we can't control renderCLs as the kernel side creates those...
//
//also there's only one of this semaphore, and in Vulkan you can have many
//and should only signal those selected
//so maybe we could emulate this in shaders?
//ie. stall shader until a value is something?
//and increment said value?
//but we'd need to patch shaders and it'd probably be slow...
//
//Apparently the RPi contains 16 4bit semaphores that are accessible for each QPU via SFU
//-----------------------------
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSemaphore
* Semaphores are a synchronization primitive that can be used to insert a dependency between batches submitted to queues.
* Semaphores have two states - signaled and unsignaled. The state of a semaphore can be signaled after execution of a batch of commands is completed.
* A batch can wait for a semaphore to become signaled before it begins execution, and the semaphore is also unsignaled before the batch begins execution.
* As with most objects in Vulkan, semaphores are an interface to internal data which is typically opaque to applications.
* This internal data is referred to as a semaphores payload. However, in order to enable communication with agents outside of the current device,
* it is necessary to be able to export that payload to a commonly understood format, and subsequently import from that format as well.
* The internal data of a semaphore may include a reference to any resources and pending work associated with signal or unsignal operations performed on that semaphore object.
* Mechanisms to import and export that internal data to and from semaphores are provided below.
* These mechanisms indirectly enable applications to share semaphore state between two or more semaphores and other synchronization primitives across process and API boundaries.
* When created, the semaphore is in the unsignaled state.
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSemaphore(
VkDevice device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSemaphore* pSemaphore)
{
PROFILESTART(rpi_vkCreateSemaphore);
assert(device);
assert(pSemaphore);
//we'll probably just use an IOCTL to wait for a GPU sequence number to complete.
sem_t* s = ALLOCATE(sizeof(sem_t), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!s)
{
PROFILEEND(rpi_vkCreateSemaphore);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
sem_init(s, 0, 0); //create semaphore unsignalled, shared between threads
*pSemaphore = (VkSemaphore)s;
PROFILEEND(rpi_vkCreateSemaphore);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdPipelineBarrier
* vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass.
* When vkCmdPipelineBarrier is submitted to a queue, it defines a memory dependency between commands that were submitted before it, and those submitted after it.
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the first synchronization scope includes all commands that occur earlier in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the first synchronization scope includes only commands that occur earlier in submission order within the same subpass.
* In either case, the first synchronization scope is limited to operations on the pipeline stages determined by the source stage mask specified by srcStageMask.
*
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the second synchronization scope includes all commands that occur later in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the second synchronization scope includes only commands that occur later in submission order within the same subpass.
* In either case, the second synchronization scope is limited to operations on the pipeline stages determined by the destination stage mask specified by dstStageMask.
*
* The first access scope is limited to access in the pipeline stages determined by the source stage mask specified by srcStageMask.
* Within that, the first access scope only includes the first access scopes defined by elements of the pMemoryBarriers,
* pBufferMemoryBarriers and pImageMemoryBarriers arrays, which each define a set of memory barriers. If no memory barriers are specified,
* then the first access scope includes no accesses.
*
* The second access scope is limited to access in the pipeline stages determined by the destination stage mask specified by dstStageMask.
* Within that, the second access scope only includes the second access scopes defined by elements of the pMemoryBarriers, pBufferMemoryBarriers and pImageMemoryBarriers arrays,
* which each define a set of memory barriers. If no memory barriers are specified, then the second access scope includes no accesses.
*
* If dependencyFlags includes VK_DEPENDENCY_BY_REGION_BIT, then any dependency between framebuffer-space pipeline stages is framebuffer-local - otherwise it is framebuffer-global.
*/
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdPipelineBarrier(
VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
VkDependencyFlags dependencyFlags,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
PROFILESTART(rpi_vkCmdPipelineBarrier);
assert(commandBuffer);
//TODO pipeline stage flags
//VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
//VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
//VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
//VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
//VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
//VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
//VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
//VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
//VK_PIPELINE_STAGE_TRANSFER_BIT
//VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
//VK_PIPELINE_STAGE_HOST_BIT
//VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
//VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
//TODO dependency flags
//VK_DEPENDENCY_BY_REGION_BIT,
//VK_DEPENDENCY_DEVICE_GROUP_BIT,
//VK_DEPENDENCY_VIEW_LOCAL_BIT
//TODO access flags
//VK_ACCESS_INDIRECT_COMMAND_READ_BIT
//VK_ACCESS_INDEX_READ_BIT
//VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
//VK_ACCESS_UNIFORM_READ_BIT
//VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
//VK_ACCESS_SHADER_READ_BIT
//VK_ACCESS_SHADER_WRITE_BIT
//VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
//VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
//VK_ACCESS_TRANSFER_READ_BIT
//VK_ACCESS_TRANSFER_WRITE_BIT
//VK_ACCESS_HOST_READ_BIT
//VK_ACCESS_HOST_WRITE_BIT
//VK_ACCESS_MEMORY_READ_BIT
//VK_ACCESS_MEMORY_WRITE_BIT
//VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX
//VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX
//TODO Layout transition flags
//VK_IMAGE_LAYOUT_UNDEFINED
//VK_IMAGE_LAYOUT_GENERAL
//VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
//VK_IMAGE_LAYOUT_PREINITIALIZED
//VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
//VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR
for(int c = 0; c < memoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < bufferMemoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < imageMemoryBarrierCount; ++c)
{
_image* i = pImageMemoryBarriers[c].image;
if(srcStageMask & VK_PIPELINE_STAGE_TRANSFER_BIT &&
pImageMemoryBarriers[c].srcAccessMask & VK_ACCESS_TRANSFER_WRITE_BIT)
{
assert(i->layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
//transition to new layout
i->layout = pImageMemoryBarriers[c].newLayout;
}
PROFILEEND(rpi_vkCmdPipelineBarrier);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDeviceWaitIdle
* vkDeviceWaitIdle is equivalent to calling vkQueueWaitIdle for all queues owned by device.
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkDeviceWaitIdle(
VkDevice device)
{
PROFILESTART(rpi_vkDeviceWaitIdle);
assert(device);
for(int c = 0; c < numQueueFamilies; ++c)
{
for(int d = 0; d < device->numQueues[c]; ++d)
{
uint64_t lastFinishedSeqno;
uint64_t timeout = WAIT_TIMEOUT_INFINITE;
vc4_seqno_wait(controlFd, &lastFinishedSeqno, device->queues[c][d].lastEmitSeqno, &timeout);
}
}
PROFILEEND(rpi_vkDeviceWaitIdle);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueueWaitIdle
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueWaitIdle(
VkQueue queue)
{
PROFILESTART(rpi_vkQueueWaitIdle);
assert(queue);
_queue* q = queue;
uint64_t lastFinishedSeqno;
uint64_t timeout = WAIT_TIMEOUT_INFINITE;
vc4_seqno_wait(controlFd, &lastFinishedSeqno, q->lastEmitSeqno, &timeout);
PROFILEEND(rpi_vkQueueWaitIdle);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySemaphore
*/
VKAPI_ATTR void VKAPI_CALL rpi_vkDestroySemaphore(
VkDevice device,
VkSemaphore semaphore,
const VkAllocationCallbacks* pAllocator)
{
PROFILESTART(rpi_vkDestroySemaphore);
assert(device);
if(semaphore)
{
sem_destroy((sem_t*)semaphore);
FREE(semaphore);
}
PROFILEEND(rpi_vkDestroySemaphore);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateFence
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateFence(
VkDevice device,
const VkFenceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkFence* pFence)
{
PROFILESTART(rpi_vkCreateFence);
assert(device);
assert(pCreateInfo);
assert(pFence);
_fence* f = ALLOCATE(sizeof(_fence), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!f)
{
PROFILEEND(rpi_vkCreateFence);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
f->seqno = 0;
f->signaled = pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT;
*pFence = f;
PROFILEEND(rpi_vkCreateFence);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyFence
*/
VKAPI_ATTR void VKAPI_CALL rpi_vkDestroyFence(
VkDevice device,
VkFence fence,
const VkAllocationCallbacks* pAllocator)
{
PROFILESTART(rpi_vkDestroyFence);
assert(device);
if(fence)
{
FREE(fence);
}
PROFILEEND(rpi_vkDestroyFence);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetFenceStatus
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetFenceStatus(
VkDevice device,
VkFence fence)
{
PROFILESTART(rpi_vkGetFenceStatus);
assert(device);
assert(fence);
//TODO update fence status based on last completed seqno?
_fence* f = fence;
VkResult retval = f->signaled ? VK_SUCCESS : VK_NOT_READY;
PROFILEEND(rpi_vkGetFenceStatus);
return retval;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkResetFences
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkResetFences(
VkDevice device,
uint32_t fenceCount,
const VkFence* pFences)
{
PROFILESTART(rpi_vkResetFences);
assert(device);
assert(pFences);
assert(fenceCount > 0);
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
f->signaled = 0;
f->seqno = 0;
}
PROFILEEND(rpi_vkResetFences);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkWaitForFences
*/
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkWaitForFences(
VkDevice device,
uint32_t fenceCount,
const VkFence* pFences,
VkBool32 waitAll,
uint64_t timeout)
{
PROFILESTART(rpi_vkWaitForFences);
assert(device);
assert(pFences);
assert(fenceCount > 0);
if(waitAll)
{
if(!timeout)
{
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
if(!f->signaled) //if any unsignaled
{
PROFILEEND(rpi_vkWaitForFences);
return VK_TIMEOUT;
}
PROFILEEND(rpi_vkWaitForFences);
return VK_SUCCESS;
}
}
//wait for all to be signaled
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
uint64_t lastFinishedSeqno = 0;
if(!f->signaled)
{
int ret = vc4_seqno_wait(controlFd, &lastFinishedSeqno, f->seqno, &timeout);
if(ret < 0)
{
PROFILEEND(rpi_vkWaitForFences);
return VK_TIMEOUT;
}
f->signaled = 1;
f->seqno = 0;
}
}
}
else
{
if(!timeout)
{
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
if(f->signaled) //if any signaled
{
PROFILEEND(rpi_vkWaitForFences);
return VK_SUCCESS;
}
PROFILEEND(rpi_vkWaitForFences);
return VK_TIMEOUT;
}
}
//wait for any to be signaled
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
uint64_t lastFinishedSeqno = 0;
if(!f->signaled)
{
int ret = vc4_seqno_wait(controlFd, &lastFinishedSeqno, f->seqno, &timeout);
if(ret < 0)
{
continue;
}
f->signaled = 1;
f->seqno = 0;
PROFILEEND(rpi_vkWaitForFences);
return VK_SUCCESS;
}
else
{
//already signaled
PROFILEEND(rpi_vkWaitForFences);
return VK_SUCCESS;
}
}
PROFILEEND(rpi_vkWaitForFences);
return VK_TIMEOUT;
}
PROFILEEND(rpi_vkWaitForFences);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdWaitEvents(
VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
UNSUPPORTED(vkCmdWaitEvents);
}
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetEventStatus(
VkDevice device,
VkEvent event)
{
UNSUPPORTED(vkGetEventStatus);
return UNSUPPORTED_RETURN;
}
VKAPI_ATTR void VKAPI_CALL rpi_vkDestroyEvent(
VkDevice device,
VkEvent event,
const VkAllocationCallbacks* pAllocator)
{
UNSUPPORTED(vkDestroyEvent);
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdResetEvent(
VkCommandBuffer commandBuffer,
VkEvent event,
VkPipelineStageFlags stageMask)
{
UNSUPPORTED(vkCmdResetEvent);
}
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateEvent(
VkDevice device,
const VkEventCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkEvent* pEvent)
{
UNSUPPORTED(vkCreateEvent);
return UNSUPPORTED_RETURN;
}
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkResetEvent(
VkDevice device,
VkEvent event)
{
UNSUPPORTED(vkResetEvent);
return UNSUPPORTED_RETURN;
}
VKAPI_ATTR VkResult VKAPI_CALL rpi_vkSetEvent(
VkDevice device,
VkEvent event)
{
UNSUPPORTED(vkSetEvent);
return UNSUPPORTED_RETURN;
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdSetEvent(
VkCommandBuffer commandBuffer,
VkEvent event,
VkPipelineStageFlags stageMask)
{
UNSUPPORTED(vkCmdSetEvent);
}