1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-01 13:24:20 +01:00
rpi-vk-driver/driver/sync.c

527 lines
18 KiB
C
Raw Normal View History

2018-08-26 15:11:43 +02:00
#include "common.h"
#include "kernel/vc4_packet.h"
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSemaphore
* Semaphores are a synchronization primitive that can be used to insert a dependency between batches submitted to queues.
* Semaphores have two states - signaled and unsignaled. The state of a semaphore can be signaled after execution of a batch of commands is completed.
* A batch can wait for a semaphore to become signaled before it begins execution, and the semaphore is also unsignaled before the batch begins execution.
* As with most objects in Vulkan, semaphores are an interface to internal data which is typically opaque to applications.
* This internal data is referred to as a semaphores payload. However, in order to enable communication with agents outside of the current device,
* it is necessary to be able to export that payload to a commonly understood format, and subsequently import from that format as well.
* The internal data of a semaphore may include a reference to any resources and pending work associated with signal or unsignal operations performed on that semaphore object.
* Mechanisms to import and export that internal data to and from semaphores are provided below.
* These mechanisms indirectly enable applications to share semaphore state between two or more semaphores and other synchronization primitives across process and API boundaries.
* When created, the semaphore is in the unsignaled state.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(
VkDevice device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSemaphore* pSemaphore)
{
assert(device);
assert(pSemaphore);
//we'll probably just use an IOCTL to wait for a GPU sequence number to complete.
sem_t* s = ALLOCATE(sizeof(sem_t), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2018-08-26 15:11:43 +02:00
if(!s)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
sem_init(s, 0, 0); //create semaphore unsignalled, shared between threads
*pSemaphore = (VkSemaphore)s;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdPipelineBarrier
* vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass.
* When vkCmdPipelineBarrier is submitted to a queue, it defines a memory dependency between commands that were submitted before it, and those submitted after it.
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the first synchronization scope includes all commands that occur earlier in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the first synchronization scope includes only commands that occur earlier in submission order within the same subpass.
* In either case, the first synchronization scope is limited to operations on the pipeline stages determined by the source stage mask specified by srcStageMask.
*
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the second synchronization scope includes all commands that occur later in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the second synchronization scope includes only commands that occur later in submission order within the same subpass.
* In either case, the second synchronization scope is limited to operations on the pipeline stages determined by the destination stage mask specified by dstStageMask.
*
* The first access scope is limited to access in the pipeline stages determined by the source stage mask specified by srcStageMask.
* Within that, the first access scope only includes the first access scopes defined by elements of the pMemoryBarriers,
* pBufferMemoryBarriers and pImageMemoryBarriers arrays, which each define a set of memory barriers. If no memory barriers are specified,
* then the first access scope includes no accesses.
*
* The second access scope is limited to access in the pipeline stages determined by the destination stage mask specified by dstStageMask.
* Within that, the second access scope only includes the second access scopes defined by elements of the pMemoryBarriers, pBufferMemoryBarriers and pImageMemoryBarriers arrays,
* which each define a set of memory barriers. If no memory barriers are specified, then the second access scope includes no accesses.
*
* If dependencyFlags includes VK_DEPENDENCY_BY_REGION_BIT, then any dependency between framebuffer-space pipeline stages is framebuffer-local - otherwise it is framebuffer-global.
*/
VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier(
VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
VkDependencyFlags dependencyFlags,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
assert(commandBuffer);
//TODO pipeline stage flags
//VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
//VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
//VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
//VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
//VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
//VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
//VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
//VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
//VK_PIPELINE_STAGE_TRANSFER_BIT
//VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
//VK_PIPELINE_STAGE_HOST_BIT
//VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
//VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
//TODO dependency flags
//VK_DEPENDENCY_BY_REGION_BIT,
//VK_DEPENDENCY_DEVICE_GROUP_BIT,
//VK_DEPENDENCY_VIEW_LOCAL_BIT
//TODO access flags
//VK_ACCESS_INDIRECT_COMMAND_READ_BIT
//VK_ACCESS_INDEX_READ_BIT
//VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
//VK_ACCESS_UNIFORM_READ_BIT
//VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
//VK_ACCESS_SHADER_READ_BIT
//VK_ACCESS_SHADER_WRITE_BIT
//VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
//VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
//VK_ACCESS_TRANSFER_READ_BIT
//VK_ACCESS_TRANSFER_WRITE_BIT
//VK_ACCESS_HOST_READ_BIT
//VK_ACCESS_HOST_WRITE_BIT
//VK_ACCESS_MEMORY_READ_BIT
//VK_ACCESS_MEMORY_WRITE_BIT
//VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX
//VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX
//TODO Layout transition flags
//VK_IMAGE_LAYOUT_UNDEFINED
//VK_IMAGE_LAYOUT_GENERAL
//VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
//VK_IMAGE_LAYOUT_PREINITIALIZED
//VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
//VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR
for(int c = 0; c < memoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < bufferMemoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < imageMemoryBarrierCount; ++c)
{
_image* i = pImageMemoryBarriers[c].image;
2019-04-10 22:13:59 +02:00
//assert(i->layout == pImageMemoryBarriers[c].oldLayout || i->layout == VK_IMAGE_LAYOUT_UNDEFINED);
2018-08-26 15:11:43 +02:00
if(srcStageMask & VK_PIPELINE_STAGE_TRANSFER_BIT &&
pImageMemoryBarriers[c].srcAccessMask & VK_ACCESS_TRANSFER_WRITE_BIT &&
i->needToClear)
{
//insert CRs to clear the image
assert(i->layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
0, 0, 0, 0,
getFormatBpp(i->format) == 64, //64 bit color mode
i->samples > 1, //msaa
i->width, i->height, 0, 0, 0);
//START_TILE_BINNING resets the statechange counters in the hardware,
//which are what is used when a primitive is binned to a tile to
//figure out what new state packets need to be written to that tile's
//command list.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
clInsertStartTileBinning(&commandBuffer->binCl);
//Reset the current compressed primitives format. This gets modified
//by VC4_PACKET_GL_INDEXED_PRIMITIVE and
//VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
//of every tile.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length);
clInsertPrimitiveListFormat(&commandBuffer->binCl,
1, //16 bit
2); //tris
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, i->boundMem->bo);
2018-08-26 15:11:43 +02:00
commandBuffer->submitCl.color_write.hindex = idx;
commandBuffer->submitCl.color_write.offset = 0;
commandBuffer->submitCl.color_write.flags = 0;
//TODO format
commandBuffer->submitCl.color_write.bits =
VC4_SET_FIELD(VC4_RENDER_CONFIG_FORMAT_RGBA8888, VC4_RENDER_CONFIG_FORMAT) |
VC4_SET_FIELD(i->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
commandBuffer->submitCl.clear_color[0] = i->clearColor[0];
commandBuffer->submitCl.clear_color[1] = i->clearColor[1];
//TODO ranges
commandBuffer->submitCl.min_x_tile = 0;
commandBuffer->submitCl.min_y_tile = 0;
uint32_t tileSizeW = 64;
uint32_t tileSizeH = 64;
if(i->samples > 1)
{
tileSizeW >>= 1;
tileSizeH >>= 1;
}
if(getFormatBpp(i->format) == 64)
{
tileSizeH >>= 1;
}
uint32_t widthInTiles = divRoundUp(i->width, tileSizeW);
uint32_t heightInTiles = divRoundUp(i->height, tileSizeH);
commandBuffer->submitCl.max_x_tile = widthInTiles - 1;
commandBuffer->submitCl.max_y_tile = heightInTiles - 1;
commandBuffer->submitCl.width = i->width;
commandBuffer->submitCl.height = i->height;
commandBuffer->submitCl.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
commandBuffer->submitCl.clear_z = 0; //TODO
commandBuffer->submitCl.clear_s = 0;
}
//transition to new layout
i->layout = pImageMemoryBarriers[c].newLayout;
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDeviceWaitIdle
* vkDeviceWaitIdle is equivalent to calling vkQueueWaitIdle for all queues owned by device.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(
VkDevice device)
{
assert(device);
for(int c = 0; c < numQueueFamilies; ++c)
{
for(int d = 0; d < device->numQueues[c]; ++d)
{
uint64_t lastFinishedSeqno;
2018-10-17 21:56:13 +02:00
uint64_t timeout = WAIT_TIMEOUT_INFINITE;
vc4_seqno_wait(controlFd, &lastFinishedSeqno, device->queues[c][d].lastEmitSeqno, &timeout);
2018-08-26 15:11:43 +02:00
}
}
return VK_SUCCESS;
}
2018-10-17 21:56:13 +02:00
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueueWaitIdle
*/
VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle(
VkQueue queue)
{
assert(queue);
2018-08-26 15:11:43 +02:00
2018-10-17 21:56:13 +02:00
_queue* q = queue;
uint64_t lastFinishedSeqno;
uint64_t timeout = WAIT_TIMEOUT_INFINITE;
vc4_seqno_wait(controlFd, &lastFinishedSeqno, q->lastEmitSeqno, &timeout);
2018-10-17 21:56:13 +02:00
return VK_SUCCESS;
}
2018-08-26 15:11:43 +02:00
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySemaphore
*/
VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore(
VkDevice device,
VkSemaphore semaphore,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
2019-02-09 17:18:15 +01:00
if(semaphore)
{
sem_destroy((sem_t*)semaphore);
2019-02-10 00:53:32 +01:00
FREE(semaphore);
2019-02-09 17:18:15 +01:00
}
2018-08-26 15:11:43 +02:00
}
2018-10-17 21:56:13 +02:00
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateFence
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence(
VkDevice device,
const VkFenceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkFence* pFence)
{
assert(device);
assert(pCreateInfo);
assert(pFence);
_fence* f = ALLOCATE(sizeof(_fence), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2018-10-17 21:56:13 +02:00
2019-02-08 01:33:51 +01:00
if(!f)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
2018-10-17 21:56:13 +02:00
f->seqno = 0;
f->signaled = pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT;
*pFence = f;
2019-02-08 01:33:51 +01:00
return VK_SUCCESS;
2018-10-17 21:56:13 +02:00
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyFence
*/
VKAPI_ATTR void VKAPI_CALL vkDestroyFence(
VkDevice device,
VkFence fence,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
2019-02-10 00:53:32 +01:00
if(fence)
{
FREE(fence);
}
2018-10-17 21:56:13 +02:00
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetFenceStatus
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceStatus(
VkDevice device,
VkFence fence)
{
assert(device);
assert(fence);
//TODO update fence status based on last completed seqno?
_fence* f = fence;
return f->signaled ? VK_SUCCESS : VK_NOT_READY;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkResetFences
*/
VKAPI_ATTR VkResult VKAPI_CALL vkResetFences(
VkDevice device,
uint32_t fenceCount,
const VkFence* pFences)
{
assert(device);
assert(pFences);
assert(fenceCount > 0);
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
f->signaled = 0;
f->seqno = 0;
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkWaitForFences
*/
VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences(
VkDevice device,
uint32_t fenceCount,
const VkFence* pFences,
VkBool32 waitAll,
uint64_t timeout)
{
assert(device);
assert(pFences);
assert(fenceCount > 0);
if(waitAll)
{
if(!timeout)
{
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
if(!f->signaled) //if any unsignaled
{
return VK_TIMEOUT;
}
return VK_SUCCESS;
}
}
//wait for all to be signaled
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
uint64_t lastFinishedSeqno = 0;
if(!f->signaled)
{
int ret = vc4_seqno_wait(controlFd, &lastFinishedSeqno, f->seqno, &timeout);
2018-10-17 21:56:13 +02:00
if(ret < 0)
{
return VK_TIMEOUT;
}
f->signaled = 1;
f->seqno = 0;
}
}
}
else
{
if(!timeout)
{
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
if(f->signaled) //if any signaled
{
return VK_SUCCESS;
}
return VK_TIMEOUT;
}
}
//wait for any to be signaled
for(uint32_t c = 0; c < fenceCount; ++c)
{
_fence* f = pFences[c];
uint64_t lastFinishedSeqno = 0;
if(!f->signaled)
{
int ret = vc4_seqno_wait(controlFd, &lastFinishedSeqno, f->seqno, &timeout);
2018-10-17 21:56:13 +02:00
if(ret < 0)
{
continue;
}
f->signaled = 1;
f->seqno = 0;
return VK_SUCCESS;
}
}
return VK_TIMEOUT;
}
return VK_SUCCESS;
}
2019-04-22 15:58:27 +02:00
VKAPI_ATTR void VKAPI_CALL vkCmdWaitEvents(
VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
}
VKAPI_ATTR VkResult VKAPI_CALL vkGetEventStatus(
VkDevice device,
VkEvent event)
{
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL vkDestroyEvent(
VkDevice device,
VkEvent event,
const VkAllocationCallbacks* pAllocator)
{
}
VKAPI_ATTR void VKAPI_CALL vkCmdResetEvent(
VkCommandBuffer commandBuffer,
VkEvent event,
VkPipelineStageFlags stageMask)
{
}
VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent(
VkDevice device,
const VkEventCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkEvent* pEvent)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL vkResetEvent(
VkDevice device,
VkEvent event)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL vkSetEvent(
VkDevice device,
VkEvent event)
{
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL vkCmdSetEvent(
VkCommandBuffer commandBuffer,
VkEvent event,
VkPipelineStageFlags stageMask)
{
}