diff --git a/driver/ConsecutivePoolAllocator.c b/driver/ConsecutivePoolAllocator.c index dbf17bb..6b8bc20 100644 --- a/driver/ConsecutivePoolAllocator.c +++ b/driver/ConsecutivePoolAllocator.c @@ -129,7 +129,7 @@ void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBloc if(counter > 100) { - printf("--------------detected infinite loop nextFreeCandidate: %p, *nextfreecandidate: %p, p: %p\n", nextFreeBlockCandidate, *nextFreeBlockCandidate, p); + fprintf(stderr, "--------------detected infinite loop nextFreeCandidate: %p, *nextfreecandidate: %p, p: %p\n", nextFreeBlockCandidate, *nextFreeBlockCandidate, p); break; } diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index f6e24ca..a150d88 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -413,7 +413,6 @@ void clInsertShaderState(ControlList* cl, assert(cl); assert(cl->nextFreeByte); *cl->nextFreeByte = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByte++; - //TODO is this correct? *(uint32_t*)cl->nextFreeByte = moveBits(address, 28, 4) | moveBits(extendedShaderRecord, 1, 3) | @@ -723,7 +722,6 @@ void clInsertShaderRecord(ControlList* cls, assert(cls); assert(cls->buffer); assert(cls->nextFreeByte); - //TODO is this correct? *cls->nextFreeByte = moveBits(fragmentShaderIsSingleThreaded, 1, 0) | moveBits(pointSizeIncludedInShadedVertexData, 1, 1) | @@ -739,7 +737,7 @@ void clInsertShaderRecord(ControlList* cls, *cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++; *cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++; clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &vertexCodeAddress); - //TODO wtf??? + //TODO wtf??? --> check kernel side... uint32_t offset = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); *(uint32_t*)cls->nextFreeByte = offset; cls->nextFreeByte += 4; cls->nextFreeByte += 4; @@ -767,7 +765,6 @@ void clInsertAttributeRecord(ControlList* cls, assert(cls->buffer); assert(cls->nextFreeByte); uint32_t sizeBytesMinusOne = sizeBytes - 1; - //TODO is this correct? clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &address); *(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4; *cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++; diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index a09af5f..d49670d 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -35,7 +35,7 @@ typedef struct CLMarker typedef struct ControlList { - uint8_t* buffer; //TODO size? + uint8_t* buffer; uint32_t numBlocks; uint8_t* nextFreeByte; //pointer to the next available free byte CLMarker* currMarker; diff --git a/driver/CustomAssert.h b/driver/CustomAssert.h index 3284664..627df6d 100644 --- a/driver/CustomAssert.h +++ b/driver/CustomAssert.h @@ -19,7 +19,7 @@ __inline__ static void DEBUG_BREAK(void) if( expr ){} \ else \ { \ - printf("Assert failed: %s\n" \ + fprintf(stderr, "Assert failed: %s\n" \ "File: %s\n" \ "Line: %i\n", #expr, __FILE__, __LINE__); \ DEBUG_BREAK(); \ diff --git a/driver/command.c b/driver/command.c index 60cc7de..6cf4dc0 100644 --- a/driver/command.c +++ b/driver/command.c @@ -20,15 +20,11 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( assert(device); assert(pCreateInfo); - //VK_COMMAND_POOL_CREATE_TRANSIENT_BIT + //TODO VK_COMMAND_POOL_CREATE_TRANSIENT_BIT //specifies that command buffers allocated from the pool will be short-lived, meaning that they will be reset or freed in a relatively short timeframe. //This flag may be used by the implementation to control memory allocation behavior within the pool. //--> definitely use pool allocator - //VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT - //allows any command buffer allocated from a pool to be individually reset to the initial state; either by calling vkResetCommandBuffer, or via the implicit reset when calling vkBeginCommandBuffer. - //If this flag is not set on a pool, then vkResetCommandBuffer must not be called for any command buffer allocated from that pool. - //TODO pool family ignored for now _commandPool* cp = ALLOCATE(sizeof(_commandPool), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -40,6 +36,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( cp->queueFamilyIndex = pCreateInfo->queueFamilyIndex; + cp->resetAble = pCreateInfo->flags & VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + //TODO CTS fails as we can't allocate enough memory for some reason //tweak system allocation as root using: //make sure kernel denies memory allocation that it won't be able to serve @@ -98,6 +96,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers( _commandPool* cp = (_commandPool*)pAllocateInfo->commandPool; + //TODO secondary command buffers + //if(cp->usePoolAllocator) { for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c) @@ -199,15 +199,15 @@ VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( assert(commandBuffer); assert(pBeginInfo); - //TODO + //TODO secondary command buffers //VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT //specifies that each recording of the command buffer will only be submitted once, and the command buffer will be reset and recorded again between each submission. - //VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT + //TODO VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT //specifies that a secondary command buffer is considered to be entirely inside a render pass. If this is a primary command buffer, then this bit is ignored - //VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT + //TODO VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT //specifies that a command buffer can be resubmitted to a queue while it is in the pending state, and recorded into multiple primary command buffers //When a command buffer begins recording, all state in that command buffer is undefined @@ -215,6 +215,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( commandBuffer->usageFlags = pBeginInfo->flags; commandBuffer->state = CMDBUF_STATE_RECORDING; + //TODO reset state? + return VK_SUCCESS; } @@ -260,13 +262,14 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( { assert(queue); + //TODO this is incorrect + //see sync.c + //TODO: deal with pSubmits->pWaitDstStageMask for(int c = 0; c < pSubmits->waitSemaphoreCount; ++c) { sem_wait((sem_t*)pSubmits->pWaitSemaphores[c]); } - //TODO: deal with pSubmits->pWaitDstStageMask - for(int c = 0; c < pSubmits->commandBufferCount; ++c) { if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_EXECUTABLE) @@ -279,6 +282,12 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( { VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c]; + if(!cmdbuf->binCl.currMarker) + { + //no markers recorded yet, skip + continue; + } + //first entry is assumed to be a marker CLMarker* marker = cmdbuf->binCl.buffer; @@ -304,6 +313,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( //This should not result in an insertion! uint32_t imageIdx = clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, i->boundMem->bo); + //TODO + //depth/stencil/msaa + //fill out submit cl fields submitCl.color_write.hindex = imageIdx; submitCl.color_write.offset = 0; @@ -389,8 +401,12 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( /**/ - //submit ioctl + //TODO somehow store last finished globally + //so waiting on fences is faster + //eg. could be an atomic value static uint64_t lastFinishedSeqno = 0; + + //submit ioctl vc4_cl_submit(controlFd, &submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno); //advance in linked list @@ -418,7 +434,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( sem_post((sem_t*)pSubmits->pSignalSemaphores[c]); } - //TODO is this correct? _fence* f = fence; if(f) { @@ -495,7 +510,9 @@ VKAPI_ATTR void VKAPI_CALL vkTrimCommandPool( _commandPool* cp = commandPool; - //TODO?? + //TODO trim cp's pool allocator and consecutive pool allocator + //by reallocating to just used size + //kinda silly, as if you need memory afterwards we need to reallocate again... } /* @@ -534,8 +551,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandPool( } } - //TODO secondary command buffer stuff - //TODO reset flag + //TODO secondary command buffers + + //TODO reset flag --> free all pool resources } /* @@ -551,6 +569,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandBuffer( assert(cb->state != CMDBUF_STATE_PENDING); + assert(cb->cp->resetAble); + if(cb->state == CMDBUF_STATE_RECORDING || cb->state == CMDBUF_STATE_EXECUTABLE) { cb->state = CMDBUF_STATE_INVALID; @@ -560,7 +580,12 @@ VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandBuffer( cb->state = CMDBUF_STATE_INITIAL; } - //TODO flag? + if(flags & VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT) + { + //TODO release resources + } + + //TODO reset state? } VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( @@ -575,5 +600,5 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetDeviceMask( VkCommandBuffer commandBuffer, uint32_t deviceMask) { - + UNSUPPORTED(vkCmdSetDeviceMask); } diff --git a/driver/common.c b/driver/common.c index 5ad844b..66513c5 100644 --- a/driver/common.c +++ b/driver/common.c @@ -146,8 +146,8 @@ uint32_t getFormatBpp(VkFormat f) case VK_FORMAT_UNDEFINED: //TODO return 8; default:// - printf("format %i\n", f); - assert(0); + fprintf(stderr, "format %i\n", f); + assert(!"Unknown format."); return 0; } } @@ -278,7 +278,8 @@ void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, } default: { - assert(0); //unsupported + fprintf(stderr, "bpp: %i\n", bpp); + assert(!"Unsupported texture bpp."); } } @@ -528,7 +529,7 @@ void clDump(void* cl, uint32_t size) uint32_t length; if (inst == NULL) { - printf("0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n", + fprintf(stderr, "0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n", offset, hw_offset, header, header); return; } @@ -664,8 +665,8 @@ uint8_t getTextureDataType(VkFormat format) case VK_FORMAT_UNDEFINED: //TODO return -1; default:// - printf("unsupported format %i\n", format); - assert(0); + fprintf(stderr, "format %i\n", format); + assert(!"Unsupported format."); return -1; } } @@ -726,8 +727,8 @@ uint8_t getWrapMode(VkSamplerAddressMode mode) } else { - printf("unsupported wrap mode: %i\n", mode); - assert(0); + fprintf(stderr, "wrap mode: %i\n", mode); + assert(!"Unsupported wrap mode."); return -1; } } @@ -743,16 +744,15 @@ uint32_t getRenderTargetFormatVC4(VkFormat format) case VK_FORMAT_B5G6R5_UNORM_PACK16: return VC4_RENDER_CONFIG_FORMAT_BGR565; default: - printf("unsupported rendertarget format: %i\n", format); - assert(0); + fprintf(stderr, "rendertarget format: %i\n", format); + assert(!"Unsupported render target format"); return -1; } } //////////////////////////////////////////////////// //////////////////////////////////////////////////// -/// just so we can return a function pointer, TODO -//////////////////////////////////////////////////// +/// just so we can return a function pointer //////////////////////////////////////////////////// //////////////////////////////////////////////////// @@ -761,7 +761,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalBufferProperties( const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, VkExternalBufferProperties* pExternalBufferProperties) { - + UNSUPPORTED(vkGetPhysicalDeviceExternalBufferProperties); } VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalFenceProperties( @@ -769,7 +769,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalFenceProperties( const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo, VkExternalFenceProperties* pExternalFenceProperties) { - + UNSUPPORTED(vkGetPhysicalDeviceExternalFenceProperties); } @@ -778,7 +778,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalSemaphoreProperties( const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo, VkExternalSemaphoreProperties* pExternalSemaphoreProperties) { - + UNSUPPORTED(vkGetPhysicalDeviceExternalSemaphoreProperties); } VKAPI_ATTR void VKAPI_CALL vkGetDeviceGroupPeerMemoryFeatures( @@ -788,5 +788,5 @@ VKAPI_ATTR void VKAPI_CALL vkGetDeviceGroupPeerMemoryFeatures( uint32_t remoteDeviceIndex, VkPeerMemoryFeatureFlags* pPeerMemoryFeatures) { - + UNSUPPORTED(vkGetDeviceGroupPeerMemoryFeatures); } diff --git a/driver/common.h b/driver/common.h index b0494c6..cf4f5c4 100644 --- a/driver/common.h +++ b/driver/common.h @@ -47,6 +47,8 @@ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE #define ALLOCATE(size, alignment, scope) (pAllocator == 0) ? malloc(size) : pAllocator->pfnAllocation(pAllocator->pUserData, size, alignment, scope) #define FREE(memory) (pAllocator == 0) ? free(memory) : pAllocator->pfnFree(pAllocator->pUserData, memory) +#define UNSUPPORTED(str) fprintf(stderr, "Unsupported: %s\n", str); exit(-1) + typedef struct VkDevice_T _device; typedef struct VkQueue_T @@ -60,6 +62,7 @@ typedef struct VkCommandPool_T PoolAllocator pa; ConsecutivePoolAllocator cpa; uint32_t queueFamilyIndex; + uint32_t resetAble; } _commandPool; typedef enum commandBufferState diff --git a/driver/compute.c b/driver/compute.c index e307c0a..07339c0 100644 --- a/driver/compute.c +++ b/driver/compute.c @@ -1,6 +1,7 @@ #include "common.h" //TODO +//compute shaders need kernel support VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( VkDevice device, @@ -10,6 +11,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines) { + UNSUPPORTED(vkCreateComputePipelines); return VK_SUCCESS; } @@ -18,7 +20,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDispatchIndirect( VkBuffer buffer, VkDeviceSize offset) { - + UNSUPPORTED(vkCmdDispatchIndirect); } VKAPI_ATTR void VKAPI_CALL vkCmdDispatch( @@ -27,7 +29,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDispatch( uint32_t groupCountY, uint32_t groupCountZ) { - + UNSUPPORTED(vkCmdDispatch); } VKAPI_ATTR void VKAPI_CALL vkCmdDispatchBase( @@ -39,5 +41,5 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDispatchBase( uint32_t groupCountY, uint32_t groupCountZ) { - + UNSUPPORTED(vkCmdDispatchBase); } diff --git a/driver/descriptorSet.c b/driver/descriptorSet.c index 7ee5066..032c35c 100644 --- a/driver/descriptorSet.c +++ b/driver/descriptorSet.c @@ -1,7 +1,5 @@ #include "common.h" -//TODO - VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorPool( VkDevice device, const VkDescriptorPoolCreateInfo* pCreateInfo, diff --git a/driver/kernelInterface.c b/driver/kernelInterface.c index e329d47..2982422 100644 --- a/driver/kernelInterface.c +++ b/driver/kernelInterface.c @@ -12,7 +12,7 @@ int openIoctl() { controlFd = open(DRM_IOCTL_CTRL_DEV_FILE_NAME, O_RDWR | O_CLOEXEC); if (controlFd < 0) { - printf("Can't open device file: %s \nError: %s\n", DRM_IOCTL_CTRL_DEV_FILE_NAME, strerror(errno)); + fprintf(stderr, "Can't open device file: %s \nError: %s\n", DRM_IOCTL_CTRL_DEV_FILE_NAME, strerror(errno)); return -1; } } @@ -72,14 +72,14 @@ int vc4_get_chip_info(int fd) */ return 21; } else { - printf("Couldn't get V3D IDENT0: %s\n", + fprintf(stderr, "Couldn't get V3D IDENT0: %s\n", strerror(errno)); return 0; } } ret = drmIoctl(fd, DRM_IOCTL_VC4_GET_PARAM, &ident1); if (ret != 0) { - printf("Couldn't get V3D IDENT1: %s\n", + fprintf(stderr, "Couldn't get V3D IDENT1: %s\n", strerror(errno)); return 0; } @@ -89,7 +89,7 @@ int vc4_get_chip_info(int fd) uint32_t v3d_ver = major * 10 + minor; if (v3d_ver != 21 && v3d_ver != 26) { - printf("V3D %d.%d not supported.\n", + fprintf(stderr, "V3D %d.%d not supported.\n", v3d_ver / 10, v3d_ver % 10); return 0; @@ -109,7 +109,7 @@ int vc4_has_feature(int fd, uint32_t feature) if (ret != 0) { - printf("Couldn't determine if VC4 has feature: %s\n", strerror(errno)); + fprintf(stderr, "Couldn't determine if VC4 has feature: %s\n", strerror(errno)); return 0; } @@ -151,7 +151,7 @@ uint64_t vc4_bo_get_tiling(int fd, uint32_t bo, uint64_t mod) } else if (mod == DRM_FORMAT_MOD_INVALID) { return get_tiling.modifier; } else if (mod != get_tiling.modifier) { - printf("Modifier 0x%llx vs. tiling (0x%llx) mismatch\n", + fprintf(stderr, "Modifier 0x%llx vs. tiling (0x%llx) mismatch\n", (long long)mod, get_tiling.modifier); return -1; } @@ -173,7 +173,7 @@ int vc4_bo_set_tiling(int fd, uint32_t bo, uint64_t mod) &set_tiling); if (ret != 0) { - printf("Couldn't set tiling: %s\n", + fprintf(stderr, "Couldn't set tiling: %s\n", strerror(errno)); return 0; } @@ -197,14 +197,14 @@ void* vc4_bo_map_unsynchronized(int fd, uint32_t bo, uint32_t offset, uint32_t s map.handle = bo; ret = drmIoctl(fd, DRM_IOCTL_VC4_MMAP_BO, &map); if (ret != 0) { - printf("Couldn't map unsync: %s\n", strerror(errno)); + fprintf(stderr, "Couldn't map unsync: %s\n", strerror(errno)); return 0; } void* mapPtr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, map.offset + offset); if (mapPtr == MAP_FAILED) { - printf("mmap of bo %d (offset 0x%016llx, size %d) failed\n", + fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", bo, (long long)map.offset + offset, size); return 0; } @@ -237,7 +237,7 @@ int vc4_bo_wait(int fd, uint32_t bo, uint64_t timeout_ns) int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait); if (ret) { if (ret != -ETIME) { - printf("BO wait failed: %s\n", + fprintf(stderr, "BO wait failed: %s\n", strerror(errno)); } @@ -269,7 +269,7 @@ int vc4_seqno_wait(int fd, uint64_t* lastFinishedSeqno, uint64_t seqno, uint64_t int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait); if (ret) { if (ret != -ETIME) { - printf("Seqno wait failed: %s\n", + fprintf(stderr, "Seqno wait failed: %s\n", strerror(errno)); } else @@ -298,7 +298,7 @@ int vc4_bo_flink(int fd, uint32_t bo, uint32_t *name) }; int ret = drmIoctl(fd, DRM_IOCTL_GEM_FLINK, &flink); if (ret) { - printf("Failed to flink bo %d: %s\n", + fprintf(stderr, "Failed to flink bo %d: %s\n", bo, strerror(errno)); //free(bo); return 0; @@ -334,7 +334,7 @@ uint32_t vc4_bo_alloc_shader(int fd, const void *data, uint32_t* size) &create); if (ret != 0) { - printf("Couldn't create shader: %s\n", + fprintf(stderr, "Couldn't create shader: %s\n", strerror(errno)); return 0; } @@ -354,7 +354,7 @@ uint32_t vc4_bo_open_name(int fd, uint32_t name) }; int ret = drmIoctl(fd, DRM_IOCTL_GEM_OPEN, &o); if (ret) { - printf("Failed to open bo %d: %s\n", + fprintf(stderr, "Failed to open bo %d: %s\n", name, strerror(errno)); return 0; } @@ -387,7 +387,7 @@ uint32_t vc4_bo_alloc(int fd, uint32_t size, const char *name) uint32_t handle = create.handle; if (ret != 0) { - printf("Couldn't alloc BO: %s\n", + fprintf(stderr, "Couldn't alloc BO: %s\n", strerror(errno)); /*if (!list_empty(&screen->bo_cache.time_list) && @@ -423,7 +423,7 @@ void vc4_bo_free(int fd, uint32_t bo, void* mappedAddr, uint32_t size) int ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &c); if (ret != 0) { - printf("close object %d: %s\n", bo, strerror(errno)); + fprintf(stderr, "couldn't close object %d: %s\n", bo, strerror(errno)); } } @@ -442,7 +442,7 @@ int vc4_bo_unpurgeable(int fd, uint32_t bo, int hasMadvise) if (drmIoctl(fd, DRM_IOCTL_VC4_GEM_MADVISE, &arg)) { - printf("Unpurgable BO madvise failed: %s\n", + fprintf(stderr, "Unpurgable BO madvise failed: %s\n", strerror(errno)); return 0; } @@ -465,7 +465,7 @@ void vc4_bo_purgeable(int fd, uint32_t bo, int hasMadvise) int ret = drmIoctl(fd, DRM_IOCTL_VC4_GEM_MADVISE, &arg); if(ret) { - printf("Purgable BO madvise failed: %s\n", + fprintf(stderr, "Purgable BO madvise failed: %s\n", strerror(errno)); } } @@ -489,7 +489,7 @@ void vc4_bo_label(int fd, uint32_t bo, const char* name) int ret = drmIoctl(fd, DRM_IOCTL_VC4_LABEL_BO, &label); if(ret) { - printf("BO label failed: %s\n", + fprintf(stderr, "BO label failed: %s\n", strerror(errno)); } } @@ -503,7 +503,7 @@ int vc4_bo_get_dmabuf(int fd, uint32_t bo) int ret = drmPrimeHandleToFD(fd, bo, O_CLOEXEC, &boFd); if (ret != 0) { - printf("Failed to export gem bo %d to dmabuf: %s\n", + fprintf(stderr, "Failed to export gem bo %d to dmabuf: %s\n", bo, strerror(errno)); return 0; } @@ -522,7 +522,7 @@ void* vc4_bo_map(int fd, uint32_t bo, uint32_t offset, uint32_t size) //wait infinitely int ok = vc4_bo_wait(fd, bo, WAIT_TIMEOUT_INFINITE); if (!ok) { - printf("BO wait for map failed: %s\n", strerror(errno)); + fprintf(stderr, "BO wait for map failed: %s\n", strerror(errno)); return 0; } @@ -540,7 +540,7 @@ void vc4_cl_submit(int fd, struct drm_vc4_submit_cl* submit, uint64_t* lastEmitt static int warned = 0; if (ret && !warned) { - printf("Draw call returned %s. " + fprintf(stderr, "Draw call returned %s. " "Expect corruption.\n", strerror(errno)); warned = 1; } else if (!ret) { @@ -554,7 +554,7 @@ void vc4_cl_submit(int fd, struct drm_vc4_submit_cl* submit, uint64_t* lastEmitt *lastFinishedSeqno > 0 ? *lastEmittedSeqno - 5 : *lastEmittedSeqno, &timeout)) { - printf("Job throttling failed\n"); + fprintf(stderr, "Job throttling failed\n"); } } } diff --git a/driver/modeset.c b/driver/modeset.c index c1a08ba..fcde64d 100644 --- a/driver/modeset.c +++ b/driver/modeset.c @@ -98,7 +98,7 @@ modeset_dev* modeset_create(int fd) // retrieve resources res = drmModeGetResources(fd); if (!res) { - printf("cannot retrieve DRM resources (%d): %m\n", errno); + fprintf(stderr, "cannot retrieve DRM resources (%d): %m\n", errno); return 0; } @@ -107,7 +107,7 @@ modeset_dev* modeset_create(int fd) // get information for each connector conn = drmModeGetConnector(fd, res->connectors[i]); if (!conn) { - printf("cannot retrieve DRM connector %u:%u (%d): %m\n", i, res->connectors[i], errno); + fprintf(stderr, "cannot retrieve DRM connector %u:%u (%d): %m\n", i, res->connectors[i], errno); continue; } @@ -121,7 +121,7 @@ modeset_dev* modeset_create(int fd) if (ret) { if (ret != -ENOENT) { errno = -ret; - printf("cannot setup device for connector %u:%u (%d): %m\n", i, res->connectors[i], errno); + fprintf(stderr, "cannot setup device for connector %u:%u (%d): %m\n", i, res->connectors[i], errno); } free(dev); drmModeFreeConnector(conn); @@ -151,7 +151,7 @@ int modeset_fb_for_dev(int fd, modeset_dev* dev, _image* buffer) ret = drmModeSetCrtc(fd, iter->crtc, buffer->fb, 0, 0, &iter->conn, 1, &iter->mode); if (ret) - printf("cannot set CRTC for connector %u (%d): %m\n", + fprintf(stderr, "cannot set CRTC for connector %u (%d): %m\n", iter->conn, errno); } @@ -173,14 +173,14 @@ static int modeset_setup_dev(int fd, drmModeRes *res, drmModeConnector *conn, // check if a monitor is connected if (conn->connection != DRM_MODE_CONNECTED) { - printf("ignoring unused connector %u\n", + fprintf(stderr, "ignoring unused connector %u\n", conn->connector_id); return -ENOENT; } // check if there is at least one valid mode if (conn->count_modes == 0) { - printf("no valid mode for connector %u\n", + fprintf(stderr, "no valid mode for connector %u\n", conn->connector_id); return -EFAULT; } @@ -195,7 +195,7 @@ static int modeset_setup_dev(int fd, drmModeRes *res, drmModeConnector *conn, // find a crtc for this connector ret = modeset_find_crtc(fd, res, conn, dev); if (ret) { - printf("no valid crtc for connector %u\n", + fprintf(stderr, "no valid crtc for connector %u\n", conn->connector_id); return ret; } @@ -273,7 +273,7 @@ static int modeset_find_crtc(int fd, drmModeRes *res, drmModeConnector *conn, for (i = 0; i < conn->count_encoders; ++i) { enc = drmModeGetEncoder(fd, conn->encoders[i]); if (!enc) { - printf("cannot retrieve encoder %u:%u (%d): %m\n", + fprintf(stderr, "cannot retrieve encoder %u:%u (%d): %m\n", i, conn->encoders[i], errno); continue; } @@ -304,7 +304,7 @@ static int modeset_find_crtc(int fd, drmModeRes *res, drmModeConnector *conn, drmModeFreeEncoder(enc); } - printf("cannot find suitable CRTC for connector %u\n", + fprintf(stderr, "cannot find suitable CRTC for connector %u\n", conn->connector_id); return -ENOENT; } @@ -343,7 +343,7 @@ int modeset_create_fb(int fd, _image *buf) ret = drmModeAddFB(fd, buf->width, buf->height, 24, 32, buf->stride, buf->boundMem->bo, &buf->fb); if (ret) { - printf("cannot create framebuffer (%d): %m\n", + fprintf(stderr, "cannot create framebuffer (%d): %m\n", errno); ret = -errno; @@ -469,7 +469,7 @@ void modeset_present_buffer(int fd, modeset_dev* dev, _image* buffer) ret = drmModeSetCrtc(fd, iter->crtc, buffer->fb, 0, 0, &iter->conn, 1, &iter->mode); if (ret) - printf("cannot flip CRTC for connector %u (%d): %m\n", + fprintf(stderr, "cannot flip CRTC for connector %u (%d): %m\n", iter->conn, errno); //else // iter->front_buf ^= 1; diff --git a/driver/sync.c b/driver/sync.c index 521512f..cdc7cd2 100644 --- a/driver/sync.c +++ b/driver/sync.c @@ -2,6 +2,41 @@ #include "kernel/vc4_packet.h" +//----------------------------- +//Semaphore vs Fence: +// Semaphore is GPU to GPU sync +// Fence is GPU to CPU sync +// Both are signalled by the GPU +// Both are multi-queue +// But Fence can be waited on by the CPU +// Semaphore can only be waited on by the GPU +// +//Events are general can be signalled by the CPU or the GPU +// But can only be waited on by the GPU +// Limited to a single queue +// +//TODO as a result the current semaphore +//implementation is wrong +//maybe use: +//clInsertWaitOnSemaphore +//clInsertIncrementSemaphore +// +//seems like each binCL needs to end with increment semaphore +//signalling that binning is done +//and each renderCL starts with a wait semaphore (to wait for binning) +// +//in theory we could add a wait for semaphore to the start of a binCL +//and an increment semaphore to either to the end of another binCL or renderCL +//but we can't control renderCLs as the kernel side creates those... +// +//also there's only one of this semaphore, and in Vulkan you can have many +//and should only signal those selected +//so maybe we could emulate this in shaders? +//ie. stall shader until a value is something? +//and increment said value? +//but we'd need to patch shaders and it'd probably be slow... +//----------------------------- + /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSemaphore * Semaphores are a synchronization primitive that can be used to insert a dependency between batches submitted to queues. diff --git a/test/texturing/texturing.cpp b/test/texturing/texturing.cpp index 3153418..d753bf7 100644 --- a/test/texturing/texturing.cpp +++ b/test/texturing/texturing.cpp @@ -119,7 +119,7 @@ char* readPPM(const char* fileName) uint16_t magic_number = ((uint16_t*)buf)[0]; if(magic_number != ppm_magic) { - printf("PPM magic number not found: %u\n", magic_number); + fprintf(stderr, "PPM magic number not found: %u\n", magic_number); return 0; }