diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index a79e049..f892c16 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -56,9 +56,9 @@ void clInsertNewCLMarker(ControlList* cl, CLMarker marker = {}; marker.memGuard = 0xDDDDDDDD; - marker.handlesBufOffset = handlesCL->offset; - marker.shaderRecBufOffset = shaderRecCL->offset; - marker.uniformsBufOffset = uniformsCL->offset; + marker.handlesBufOffset = 0; + marker.shaderRecBufOffset = 0; + marker.uniformsBufOffset = 0; marker.nextMarkerOffset = -1; //close current marker @@ -95,9 +95,9 @@ void clCloseCurrentMarker(ControlList* cl, ControlList* handlesCL, ControlList* assert(uniformsCL); CLMarker* currMarker = getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset); currMarker->size = cl->nextFreeByteOffset - (cl->currMarkerOffset + sizeof(CLMarker)); - currMarker->handlesSize = handlesCL->nextFreeByteOffset - currMarker->handlesBufOffset; - currMarker->shaderRecSize = shaderRecCL->nextFreeByteOffset - currMarker->shaderRecBufOffset; - currMarker->uniformsSize = uniformsCL->nextFreeByteOffset - currMarker->uniformsBufOffset; + currMarker->handlesSize = handlesCL->nextFreeByteOffset - (currMarker->handlesBufOffset + handlesCL->offset); + currMarker->shaderRecSize = shaderRecCL->nextFreeByteOffset - (currMarker->shaderRecBufOffset + shaderRecCL->offset); + currMarker->uniformsSize = uniformsCL->nextFreeByteOffset - (currMarker->uniformsBufOffset + uniformsCL->offset); currMarker->shaderRecCount = shaderRecCount - currMarker->shaderRecCount; //update shader rec count to reflect added shader recs } @@ -789,6 +789,8 @@ uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesOffset, uint32 *(uint32_t*)getCPAptrFromOffset(handlesCl->CPA, handlesCl->nextFreeByteOffset) = handle; handlesCl->nextFreeByteOffset += 4; + assert(handlesCl->nextFreeByteOffset < handlesCl->offset + handlesCl->blockSize * handlesCl->numBlocks); + return c; } diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index d71a3fe..9e49595 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -45,7 +45,7 @@ typedef struct CLMarker //pointers that point to where all the other CL data is //plus sizes //uint8_t* handlesBuf; // - uint32_t handlesBufOffset; + uint32_t handlesBufOffset; //relative offset, because underlying buffer could be moved uint32_t handlesSize; //uint8_t* shaderRecBuf; // uint32_t shaderRecBufOffset; diff --git a/driver/command.c b/driver/command.c index 1f37415..657d6bb 100644 --- a/driver/command.c +++ b/driver/command.c @@ -49,8 +49,8 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateCommandPool( //initial number of command buffers to hold int numCommandBufs = 128; //TODO uniforms might need to realloc, which should be handled properly - int consecutiveBlockSize = ARM_PAGE_SIZE;// * 20; - int consecutiveBlockNumber = 128; + int consecutiveBlockSize = ARM_PAGE_SIZE * 20; + int consecutiveBlockNumber = 256; //int numCommandBufs = 30; //int consecutiveBlockSize = getCPABlockSize(256); //int consecutiveBlockNumber = 30; @@ -329,12 +329,13 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( uint32_t readMSAAdepthStencilImage = marker->readMSAAdepthStencilImage; //This should not result in an insertion! - uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, writeImage->boundMem->bo) : 0; - uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, readImage->boundMem->bo) : 0; - uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0; - uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0; - uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, marker->handlesBufOffset, writeMSAAimage->boundMem->bo) : 0; - uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0; + clFit(cmdbuf, &cmdbuf->handlesCl, 4 * 6); //just to be safe + uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset + cmdbuf->handlesCl.offset, marker->handlesSize, writeImage->boundMem->bo) : 0; + uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset + cmdbuf->handlesCl.offset, marker->handlesSize, readImage->boundMem->bo) : 0; + uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset + cmdbuf->handlesCl.offset, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0; + uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset + cmdbuf->handlesCl.offset, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0; + uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, marker->handlesBufOffset + cmdbuf->handlesCl.offset, writeMSAAimage->boundMem->bo) : 0; + uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset + cmdbuf->handlesCl.offset, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0; // fprintf(stderr, "writeImage: %u\n", writeImage); // fprintf(stderr, "readImage: %u\n", readImage); @@ -504,10 +505,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( submitCl.height = height; submitCl.flags |= marker->flags; - submitCl.bo_handles = getCPAptrFromOffset(cmdbuf->handlesCl.CPA, marker->handlesBufOffset); + submitCl.bo_handles = getCPAptrFromOffset(cmdbuf->handlesCl.CPA, marker->handlesBufOffset + cmdbuf->handlesCl.offset); submitCl.bin_cl = ((uint8_t*)marker) + sizeof(CLMarker); - submitCl.shader_rec = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset); - submitCl.uniforms = getCPAptrFromOffset(cmdbuf->uniformsCl.CPA, marker->uniformsBufOffset); + submitCl.shader_rec = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset + cmdbuf->shaderRecCl.offset); + submitCl.uniforms = getCPAptrFromOffset(cmdbuf->uniformsCl.CPA, marker->uniformsBufOffset + cmdbuf->uniformsCl.offset); if(marker->perfmonID) { @@ -542,15 +543,15 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( printf("BO handles: "); for(int d = 0; d < marker->handlesSize / 4; ++d) { - printf("%u ", *(((uint32_t*)getCPAptrFromOffset(cmdbuf->handlesCl.CPA, marker->handlesBufOffset))+d)); + printf("%u ", *(((uint32_t*)getCPAptrFromOffset(cmdbuf->handlesCl.CPA, marker->handlesBufOffset + cmdbuf->handlesCl.offset))+d)); } printf("\nUniforms: "); for(int d = 0; d < marker->uniformsSize / 4; ++d) { - printf("%i ", *(((uint32_t*)getCPAptrFromOffset(cmdbuf->uniformsCl.CPA, marker->uniformsBufOffset))+d)); + printf("%i ", *(((uint32_t*)getCPAptrFromOffset(cmdbuf->uniformsCl.CPA, marker->uniformsBufOffset + cmdbuf->uniformsCl.offset))+d)); } printf("\nShader recs: "); - uint8_t* ptr = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset + (3 + 3) * 4); + uint8_t* ptr = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset + cmdbuf->shaderRecCl.offset + (3 + 3) * 4); for(int d = 0; d < marker->shaderRecCount; ++d) { uint8_t flags = *ptr; diff --git a/driver/draw.c b/driver/draw.c index 595a369..6ab1978 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -184,7 +184,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //TODO commandBuffer->shaderRecCount++; - clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_SHADER_RECORD_length); + clFit(commandBuffer, &commandBuffer->shaderRecCl, 12 * sizeof(uint32_t) + 104 + 8 * 32); ControlList relocCl = commandBuffer->shaderRecCl; uint32_t attribCount = 0; @@ -229,10 +229,14 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) { clInsertNop(&commandBuffer->shaderRecCl); } + + assert(((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->memGuard == 0xDDDDDDDD); + + clFit(commandBuffer, &commandBuffer->handlesCl, (3 + 8)*4); clInsertShaderRecord(&commandBuffer->shaderRecCl, &relocCl, &commandBuffer->handlesCl, - ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, + ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, !fragModule->hasThreadSwitch, 0, //TODO point size included in shaded vertex data? @@ -318,11 +322,10 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) + cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundOffset, }; - clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_ATTRIBUTE_RECORD_length); clInsertAttributeRecord(&commandBuffer->shaderRecCl, &relocCl, &commandBuffer->handlesCl, - ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, + ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, vertexBuffer, //reloc address formatByteSize, @@ -363,7 +366,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for texture BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, di->imageView->image->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, di->imageView->image->boundMem->bo); //emit tex bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -382,7 +385,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, db->buffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, db->buffer->boundMem->bo); //emit bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -399,7 +402,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, dtb->bufferView->buffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, dtb->bufferView->buffer->boundMem->bo); //emit bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -614,7 +617,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdDrawIndexed( assert(((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->memGuard == 0xDDDDDDDD); clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, cb->indexBuffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, cb->indexBuffer->boundMem->bo); clInsertGEMRelocations(&commandBuffer->binCl, idx, 0); diff --git a/driver/renderpass.c b/driver/renderpass.c index 49258c3..1236b10 100644 --- a/driver/renderpass.c +++ b/driver/renderpass.c @@ -194,37 +194,37 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB if(writeImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeImage->boundMem->bo); } if(readImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, readImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, readImage->boundMem->bo); } if(writeDepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeDepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeDepthStencilImage->boundMem->bo); } if(readDepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, readDepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, readDepthStencilImage->boundMem->bo); } if(writeMSAAimage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeMSAAimage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeMSAAimage->boundMem->bo); } if(writeMSAAdepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesBufOffset + cb->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset))->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); } uint32_t bpp = 0; diff --git a/driver/stateChange.c b/driver/stateChange.c index f7ea647..0d290bc 100644 --- a/driver/stateChange.c +++ b/driver/stateChange.c @@ -517,7 +517,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage( //insert reloc for render target clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(commandBuffer->binCl.CPA, commandBuffer->binCl.currMarkerOffset))->handlesBufOffset, ((CLMarker*)getCPAptrFromOffset(commandBuffer->binCl.CPA, commandBuffer->binCl.currMarkerOffset))->handlesSize, i->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, ((CLMarker*)getCPAptrFromOffset(commandBuffer->binCl.CPA, commandBuffer->binCl.currMarkerOffset))->handlesBufOffset + commandBuffer->handlesCl.offset, ((CLMarker*)getCPAptrFromOffset(commandBuffer->binCl.CPA, commandBuffer->binCl.currMarkerOffset))->handlesSize, i->boundMem->bo); clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length); clInsertTileBinningModeConfiguration(&commandBuffer->binCl,