diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index 1ec263f..6482c93 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -696,7 +696,8 @@ void clInsertGEMRelocations(ControlList* cl, void clInsertShaderRecord(ControlList* cls, ControlList* relocCl, ControlList* handlesCl, - uint8_t* handlesBuf, uint32_t handlesSize, + uint32_t handlesOffset, + uint32_t handlesSize, uint32_t fragmentShaderIsSingleThreaded, //0/1 uint32_t pointSizeIncludedInShadedVertexData, //0/1 uint32_t enableClipping, //0/1 @@ -724,14 +725,14 @@ void clInsertShaderRecord(ControlList* cls, *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = 0; cls->nextFreeByteOffset++; *(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(fragmentNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset++; *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) |= fragmentNumberOfVaryings; cls->nextFreeByteOffset++; - clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &fragmentCodeAddress); + clEmitShaderRelocation(relocCl, handlesCl, handlesOffset, handlesSize, &fragmentCodeAddress); *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = fragmentCodeAddress.offset; cls->nextFreeByteOffset += 4; *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = fragmentUniformsAddress; cls->nextFreeByteOffset += 4; *(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(vertexNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset += 2; *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexAttributeArraySelectBits; cls->nextFreeByteOffset++; *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexTotalAttributesSize; cls->nextFreeByteOffset++; - clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &vertexCodeAddress); + clEmitShaderRelocation(relocCl, handlesCl, handlesOffset, handlesSize, &vertexCodeAddress); //wtf??? --> shader code will always have an offset of 0 so this is fine uint32_t offset = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = offset; cls->nextFreeByteOffset += 4; @@ -740,7 +741,7 @@ void clInsertShaderRecord(ControlList* cls, *(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(coordinateNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset += 2; *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateAttributeArraySelectBits; cls->nextFreeByteOffset++; *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateTotalAttributesSize; cls->nextFreeByteOffset++; - clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &coordinateCodeAddress); + clEmitShaderRelocation(relocCl, handlesCl, handlesOffset, handlesSize, &coordinateCodeAddress); *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateCodeAddress.offset; cls->nextFreeByteOffset += 4; *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateUniformsAddress; cls->nextFreeByteOffset += 4; } @@ -749,7 +750,7 @@ void clInsertShaderRecord(ControlList* cls, void clInsertAttributeRecord(ControlList* cls, ControlList* relocCl, ControlList* handlesCl, - uint8_t* handlesBuf, uint32_t handlesSize, + uint32_t handlesOffset, uint32_t handlesSize, ControlListAddress address, uint32_t sizeBytes, uint32_t stride, @@ -759,7 +760,7 @@ void clInsertAttributeRecord(ControlList* cls, assert(cls); assert(cls->CPA); uint32_t sizeBytesMinusOne = sizeBytes - 1; - clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &address); + clEmitShaderRelocation(relocCl, handlesCl, handlesOffset, handlesSize, &address); *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = address.offset; cls->nextFreeByteOffset += 4; *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = sizeBytesMinusOne; cls->nextFreeByteOffset++; *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = stride; cls->nextFreeByteOffset++; @@ -767,16 +768,16 @@ void clInsertAttributeRecord(ControlList* cls, *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateVPMOffset; cls->nextFreeByteOffset++; } -uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesSize, uint32_t handle) +uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesOffset, uint32_t handlesSize, uint32_t handle) { uint32_t c = 0; //if curr marker is closed already we need to work with the stored size - uint32_t numHandles = (handlesSize ? handlesSize : (handlesCl->nextFreeByteOffset - handlesCl->offset)) / 4; + uint32_t numHandles = (handlesSize ? handlesSize : (handlesCl->nextFreeByteOffset - handlesOffset)) / 4; for(; c < numHandles; ++c) { - if(((uint32_t*)getCPAptrFromOffset(handlesCl->CPA, handlesCl->offset))[c] == handle) + if(((uint32_t*)getCPAptrFromOffset(handlesCl->CPA, handlesOffset))[c] == handle) { //found return c; @@ -791,7 +792,7 @@ uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesSize, uint32_t } //input: 2 cls (cl + handles cl) -inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesSize, const ControlListAddress* address) +inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesOffset, uint32_t handlesSize, const ControlListAddress* address) { assert(relocCl); assert(relocCl->CPA); @@ -801,7 +802,7 @@ inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, assert(address->handle); //store offset within handles in cl - *(uint32_t*)getCPAptrFromOffset(relocCl->CPA, relocCl->nextFreeByteOffset) = clGetHandleIndex(handlesCl, handlesSize, address->handle); + *(uint32_t*)getCPAptrFromOffset(relocCl->CPA, relocCl->nextFreeByteOffset) = clGetHandleIndex(handlesCl, handlesOffset, handlesSize, address->handle); relocCl->nextFreeByteOffset += 4; } diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index e9acf06..a9501a5 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -67,7 +67,7 @@ typedef struct ControlList uint32_t currMarkerOffset; } ControlList; -void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesSize, const ControlListAddress* address); +void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesOffset, uint32_t handlesSize, const ControlListAddress* address); void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address); #define __gen_user_data struct ControlList @@ -187,7 +187,8 @@ void clInsertGEMRelocations(ControlList* cl, void clInsertShaderRecord(ControlList* cls, ControlList* relocCl, ControlList* handlesCl, - uint8_t* handlesBuf, uint32_t handlesSize, + uint32_t handlesOffset, + uint32_t handlesSize, uint32_t fragmentShaderIsSingleThreaded, //0/1 uint32_t pointSizeIncludedInShadedVertexData, //0/1 uint32_t enableClipping, //0/1 @@ -208,13 +209,13 @@ void clInsertShaderRecord(ControlList* cls, void clInsertAttributeRecord(ControlList* cls, ControlList* relocCl, ControlList* handlesCl, - uint8_t* handlesBuf, uint32_t handlesSize, + uint32_t handlesOffset, uint32_t handlesSize, ControlListAddress address, uint32_t sizeBytes, uint32_t stride, uint32_t vertexVPMOffset, uint32_t coordinateVPMOffset); -uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesSize, uint32_t handle); +uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesOffset, uint32_t handlesSize, uint32_t handle); #if defined (__cplusplus) } diff --git a/driver/command.c b/driver/command.c index 4d59af2..c7b0b50 100644 --- a/driver/command.c +++ b/driver/command.c @@ -327,12 +327,12 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( uint32_t readMSAAdepthStencilImage = marker->readMSAAdepthStencilImage; //This should not result in an insertion! - uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeImage->boundMem->bo) : 0; - uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, readImage->boundMem->bo) : 0; - uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0; - uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0; - uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeMSAAimage->boundMem->bo) : 0; - uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0; + uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, writeImage->boundMem->bo) : 0; + uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, readImage->boundMem->bo) : 0; + uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0; + uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0; + uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, marker->handlesBufOffset, writeMSAAimage->boundMem->bo) : 0; + uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBufOffset, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0; // fprintf(stderr, "writeImage: %u\n", writeImage); // fprintf(stderr, "readImage: %u\n", readImage); diff --git a/driver/draw.c b/driver/draw.c index 75ceac9..afcabcd 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -225,7 +225,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) clInsertShaderRecord(&commandBuffer->shaderRecCl, &relocCl, &commandBuffer->handlesCl, - getCPAptrFromOffset(cb->handlesCl.CPA, currMarker->handlesBufOffset), + currMarker->handlesBufOffset, currMarker->handlesSize, !fragModule->hasThreadSwitch, 0, //TODO point size included in shaded vertex data? @@ -311,7 +311,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) clInsertAttributeRecord(&commandBuffer->shaderRecCl, &relocCl, &commandBuffer->handlesCl, - getCPAptrFromOffset(cb->handlesCl.CPA, currMarker->handlesBufOffset), + currMarker->handlesBufOffset, currMarker->handlesSize, vertexBuffer, //reloc address formatByteSize, @@ -350,7 +350,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for texture BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, di->imageView->image->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, di->imageView->image->boundMem->bo); //emit tex bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -369,7 +369,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, db->buffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, db->buffer->boundMem->bo); //emit bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -386,7 +386,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, dtb->bufferView->buffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, dtb->bufferView->buffer->boundMem->bo); //emit bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -592,7 +592,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdDrawIndexed( CLMarker* currMarker = getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset); clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, cb->indexBuffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, cb->indexBuffer->boundMem->bo); clInsertGEMRelocations(&commandBuffer->binCl, idx, 0); diff --git a/driver/renderpass.c b/driver/renderpass.c index ef01159..133ab24 100644 --- a/driver/renderpass.c +++ b/driver/renderpass.c @@ -194,37 +194,37 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB if(writeImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, writeImage->boundMem->bo); } if(readImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, readImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, readImage->boundMem->bo); } if(writeDepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeDepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, writeDepthStencilImage->boundMem->bo); } if(readDepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, readDepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, readDepthStencilImage->boundMem->bo); } if(writeMSAAimage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeMSAAimage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, writeMSAAimage->boundMem->bo); } if(writeMSAAdepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); } uint32_t bpp = 0; diff --git a/driver/stateChange.c b/driver/stateChange.c index 203acad..72aaeb2 100644 --- a/driver/stateChange.c +++ b/driver/stateChange.c @@ -518,7 +518,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage( //insert reloc for render target clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, i->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesBufOffset, currMarker->handlesSize, i->boundMem->bo); clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length); clInsertTileBinningModeConfiguration(&commandBuffer->binCl,