From 0ac879c29471821b59ce26e23852f74195dbe77f Mon Sep 17 00:00:00 2001 From: yours3lf <0.tamas.marton@gmail.com> Date: Sat, 16 May 2020 13:17:03 +0100 Subject: [PATCH] moved CPA from naked pointers to offsets as buffers can be reallocated --- driver/ConsecutivePoolAllocator.c | 44 ++-- driver/ConsecutivePoolAllocator.h | 5 +- driver/ControlListUtil.c | 377 ++++++++++++++---------------- driver/ControlListUtil.h | 28 ++- driver/command.c | 64 ++--- driver/common.c | 10 +- driver/descriptorSet.c | 12 +- driver/draw.c | 18 +- driver/renderpass.c | 71 +++--- driver/stateChange.c | 14 +- test/CPAtest/CPAtest.cpp | 67 ++++-- 11 files changed, 372 insertions(+), 338 deletions(-) diff --git a/driver/ConsecutivePoolAllocator.c b/driver/ConsecutivePoolAllocator.c index fdffa6e..3947a5d 100644 --- a/driver/ConsecutivePoolAllocator.c +++ b/driver/ConsecutivePoolAllocator.c @@ -44,7 +44,8 @@ void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa) } //allocate numBlocks consecutive memory -void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks) +//return an offset into the pool buffer, as pool could be reallocated! +uint32_t consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks) { assert(pa); assert(pa->buf); @@ -56,7 +57,7 @@ void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks) if(!ptr) { - return 0; //no free blocks + return -1; //no free blocks } for(; ptr; ptr = *ptr) @@ -89,7 +90,7 @@ void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks) //TODO debug stuff, not for release if(ptr) memset(ptr, 0, numBlocks * pa->blockSize); - return ptr; + return (char*)ptr - pa->buf; } //free numBlocks consecutive memory @@ -167,17 +168,13 @@ void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBloc } } -void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks) +uint32_t consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks) { assert(pa); assert(pa->buf); assert(currentMem); assert(currNumBlocks); - assert(0); - - //fprintf(stderr, "CPA realloc\n"); - uint32_t* nextCandidate = (char*)currentMem + pa->blockSize * currNumBlocks; uint32_t* prevPtr = 0; @@ -185,14 +182,18 @@ void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, { if(listPtr == nextCandidate) { - //if the free list contains an element that points right after our currentMem - //we can just use that one - *prevPtr = *listPtr; + //update next free block to be the one after our current candidate + if(prevPtr) + { + *prevPtr = *listPtr; + pa->nextFreeBlock = prevPtr; + } + else if(*listPtr) + { + pa->nextFreeBlock = *listPtr; + } - //TODO debug stuff, not for release - memset(nextCandidate, 0, pa->blockSize); - - return currentMem; + return (char*)currentMem - pa->buf; } prevPtr = listPtr; @@ -204,7 +205,7 @@ void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, if(!newMem) { - return 0; + return -1; } //copy over old content @@ -212,10 +213,19 @@ void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, //free current element consecutivePoolFree(pa, currentMem, currNumBlocks); - return newMem; + return (char*)newMem - pa->buf; } } +void* getCPAptrFromOffset(ConsecutivePoolAllocator* pa, uint32_t offset) +{ + assert(pa); + assert(pa->buf); + assert(offset <= pa->size - pa->blockSize); + + return pa->buf + offset; +} + void CPAdebugPrint(ConsecutivePoolAllocator* pa) { fprintf(stderr, "\nCPA Debug Print\n"); diff --git a/driver/ConsecutivePoolAllocator.h b/driver/ConsecutivePoolAllocator.h index 3712146..8cf03f6 100644 --- a/driver/ConsecutivePoolAllocator.h +++ b/driver/ConsecutivePoolAllocator.h @@ -18,10 +18,11 @@ typedef struct ConsecutivePoolAllocator ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s); void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa); -void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks); +uint32_t consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks); void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks); -void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks); +uint32_t consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks); void CPAdebugPrint(ConsecutivePoolAllocator* pa); +void* getCPAptrFromOffset(ConsecutivePoolAllocator* pa, uint32_t offset); #if defined (__cplusplus) } diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index 6eac62e..1ec263f 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -1,4 +1,5 @@ #include "ControlListUtil.h" +#include "ConsecutivePoolAllocator.h" #include @@ -16,9 +17,8 @@ uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset) uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - uint32_t currSize = cl->nextFreeByte - cl->buffer; + assert(cl->CPA); + uint32_t currSize = cl->nextFreeByteOffset - cl->offset; if(currSize + size < cl->numBlocks * cl->blockSize - 4) { return 1; //fits! @@ -29,15 +29,16 @@ uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size) } } -void clInit(ControlList* cl, void* buffer, uint32_t blockSize) +void clInit(ControlList* cl, void* CPA, uint32_t offset, uint32_t blockSize) { assert(cl); - assert(buffer); - cl->buffer = buffer; + assert(CPA); + cl->offset = offset; cl->numBlocks = 1; cl->blockSize = blockSize; - cl->nextFreeByte = &cl->buffer[0]; - cl->currMarker = 0; + cl->nextFreeByteOffset = offset; + cl->currMarkerOffset = -1; + cl->CPA = CPA; } void clInsertNewCLMarker(ControlList* cl, @@ -48,175 +49,173 @@ void clInsertNewCLMarker(ControlList* cl, { //to be inserted when you'd insert tile binning mode config assert(cl); + assert(cl->CPA); assert(handlesCL); assert(shaderRecCL); assert(uniformsCL); CLMarker marker = {}; - marker.handlesBuf = handlesCL->buffer; - marker.shaderRecBuf = shaderRecCL->buffer; - marker.uniformsBuf = uniformsCL->buffer; + marker.handlesBufOffset = handlesCL->offset; + marker.shaderRecBufOffset = shaderRecCL->offset; + marker.uniformsBufOffset = uniformsCL->offset; + marker.nextMarkerOffset = -1; //close current marker - if(cl->currMarker && !cl->currMarker->size) + if(cl->currMarkerOffset != -1 && !((CLMarker*)getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset))->size) { clCloseCurrentMarker(cl, handlesCL, shaderRecCL, shaderRecCount, uniformsCL); } //if this is not the first marker - if(cl->currMarker) + if(cl->currMarkerOffset != -1) { - marker.handlesBuf = cl->currMarker->handlesBuf + cl->currMarker->handlesSize; - marker.shaderRecBuf = cl->currMarker->shaderRecBuf + cl->currMarker->shaderRecSize; - marker.uniformsBuf = cl->currMarker->uniformsBuf + cl->currMarker->uniformsSize; - marker.shaderRecCount = cl->currMarker->shaderRecCount; //initialize with previous marker's data + CLMarker* currMarker = getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset); + marker.handlesBufOffset = currMarker->handlesBufOffset + currMarker->handlesSize; + marker.shaderRecBufOffset = currMarker->shaderRecBufOffset + currMarker->shaderRecSize; + marker.uniformsBufOffset = currMarker->uniformsBufOffset + currMarker->uniformsSize; + marker.shaderRecCount = currMarker->shaderRecCount; //initialize with previous marker's data } - *(CLMarker*)cl->nextFreeByte = marker; - if(cl->currMarker) + *(CLMarker*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = marker; + if(cl->currMarkerOffset != -1) { - cl->currMarker->nextMarker = cl->nextFreeByte; + ((CLMarker*)getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset))->nextMarkerOffset = cl->nextFreeByteOffset; } - cl->currMarker = cl->nextFreeByte; - cl->nextFreeByte += sizeof(CLMarker); + cl->currMarkerOffset = cl->nextFreeByteOffset; + cl->nextFreeByteOffset += sizeof(CLMarker); } void clCloseCurrentMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, uint32_t shaderRecCount, ControlList* uniformsCL) { assert(cl); + assert(cl->CPA); assert(handlesCL); assert(shaderRecCL); assert(uniformsCL); - assert(cl->currMarker); - cl->currMarker->size = cl->nextFreeByte - ((uint8_t*)cl->currMarker + sizeof(CLMarker)); - cl->currMarker->handlesSize = handlesCL->nextFreeByte - cl->currMarker->handlesBuf; - cl->currMarker->shaderRecSize = shaderRecCL->nextFreeByte - cl->currMarker->shaderRecBuf; - cl->currMarker->uniformsSize = uniformsCL->nextFreeByte - cl->currMarker->uniformsBuf; - cl->currMarker->shaderRecCount = shaderRecCount - cl->currMarker->shaderRecCount; //update shader rec count to reflect added shader recs + CLMarker* currMarker = getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset); + currMarker->size = cl->nextFreeByteOffset - (cl->currMarkerOffset + sizeof(CLMarker)); + currMarker->handlesSize = handlesCL->nextFreeByteOffset - currMarker->handlesBufOffset; + currMarker->shaderRecSize = shaderRecCL->nextFreeByteOffset - currMarker->shaderRecBufOffset; + currMarker->uniformsSize = uniformsCL->nextFreeByteOffset - currMarker->uniformsBufOffset; + currMarker->shaderRecCount = shaderRecCount - currMarker->shaderRecCount; //update shader rec count to reflect added shader recs } void clInsertData(ControlList* cl, uint32_t size, uint8_t* data) { assert(cl); - memcpy(cl->nextFreeByte, data, size); - cl->nextFreeByte += size; + assert(cl->CPA); + memcpy(getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset), data, size); + cl->nextFreeByteOffset += size; } void clInsertUniformConstant(ControlList* cl, uint32_t data) { assert(cl); - *(uint32_t*)cl->nextFreeByte = data; - cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = data; + cl->nextFreeByteOffset += 4; } void clInsertUniformXYScale(ControlList* cl, float data) { assert(cl); - *(float*)cl->nextFreeByte = data; - cl->nextFreeByte += 4; + assert(cl->CPA); + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = data; + cl->nextFreeByteOffset += 4; } void clInsertUniformZOffset(ControlList* cl, float data) { assert(cl); - *(float*)cl->nextFreeByte = data; - cl->nextFreeByte += 4; + assert(cl->CPA); + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = data; + cl->nextFreeByteOffset += 4; } void clInsertHalt(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_HALT_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_HALT_opcode; + cl->nextFreeByteOffset++; } void clInsertNop(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_NOP_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_NOP_opcode; + cl->nextFreeByteOffset++; } void clInsertFlush(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_FLUSH_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_FLUSH_opcode; + cl->nextFreeByteOffset++; } void clInsertFlushAllState(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_FLUSH_ALL_STATE_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_FLUSH_ALL_STATE_opcode; + cl->nextFreeByteOffset++; } void clInsertStartTileBinning(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_START_TILE_BINNING_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_START_TILE_BINNING_opcode; + cl->nextFreeByteOffset++; } void clInsertIncrementSemaphore(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_INCREMENT_SEMAPHORE_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_INCREMENT_SEMAPHORE_opcode; + cl->nextFreeByteOffset++; } void clInsertWaitOnSemaphore(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_WAIT_ON_SEMAPHORE_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_WAIT_ON_SEMAPHORE_opcode; + cl->nextFreeByteOffset++; } //input: 2 cls (cl, handles cl) void clInsertBranch(ControlList* cls, ControlListAddress address) { assert(cls); - assert(cls->buffer); - assert(cls->nextFreeByte); - *cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++; + assert(cls->CPA); + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = V3D21_BRANCH_opcode; cls->nextFreeByteOffset++; //TODO is this correct? //clEmitShaderRelocation(cls, &address); - *(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4; + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = address.offset; cls->nextFreeByteOffset += 4; } //input: 2 cls (cl, handles cl) void clInsertBranchToSubList(ControlList* cls, ControlListAddress address) { assert(cls); - assert(cls->buffer); - assert(cls->nextFreeByte); - *cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++; + assert(cls->CPA); + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByteOffset++; //TODO is this correct? //clEmitShaderRelocation(cls, &address); - *(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4; + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = address.offset; cls->nextFreeByteOffset += 4; } void clInsertReturnFromSubList(ControlList* cl) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_RETURN_FROM_SUB_LIST_opcode; - cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_RETURN_FROM_SUB_LIST_opcode; + cl->nextFreeByteOffset++; } /*void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl) @@ -368,13 +367,12 @@ void clInsertIndexedPrimitiveList(ControlList* cl, enum V3D21_Primitive primitiveMode) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByte++; - *cl->nextFreeByte = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = indicesAddress; cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = maxIndex; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = length; cl->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = indicesAddress; cl->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = maxIndex; cl->nextFreeByteOffset += 4; } void clInsertVertexArrayPrimitives(ControlList* cl, @@ -383,11 +381,11 @@ void clInsertVertexArrayPrimitives(ControlList* cl, enum V3D21_Primitive primitiveMode) { assert(cl); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByte++; - *cl->nextFreeByte = moveBits(primitiveMode, 8, 0); cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = firstVertexIndex; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(primitiveMode, 8, 0); cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = length; cl->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = firstVertexIndex; cl->nextFreeByteOffset += 4; } /*void clInsertPrimitiveListFormat(ControlList* cl, @@ -407,12 +405,12 @@ void clInsertShaderState(ControlList* cl, uint32_t numberOfAttributeArrays) { assert(cl); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(address, 28, 4) | moveBits(extendedShaderRecord, 1, 3) | - moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByte += 4; + moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByteOffset += 4; } /* @@ -447,10 +445,9 @@ void clInsertConfigurationBits(ControlList* cl, uint32_t enableForwardFacingPrimitive) //0/1 { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(enableForwardFacingPrimitive, 1, 0) | moveBits(enableReverseFacingPrimitive, 1, 1) | moveBits(clockwisePrimitives, 1, 2) | @@ -463,47 +460,43 @@ void clInsertConfigurationBits(ControlList* cl, moveBits(depthTestFunction, 3, 12) | moveBits(zUpdatesEnable, 1, 15) | moveBits(earlyZEnable, 1, 16) | - moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 3; + moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByteOffset += 3; } void clInsertFlatShadeFlags(ControlList* cl, uint32_t flags) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = flags; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = flags; cl->nextFreeByteOffset += 4; } void clInsertPointSize(ControlList* cl, float size) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_POINT_SIZE_opcode; cl->nextFreeByte++; - *(float*)cl->nextFreeByte = size; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_POINT_SIZE_opcode; cl->nextFreeByteOffset++; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = size; cl->nextFreeByteOffset += 4; } void clInsertLineWidth(ControlList* cl, float width) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_LINE_WIDTH_opcode; cl->nextFreeByte++; - *(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_LINE_WIDTH_opcode; cl->nextFreeByteOffset++; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = width; cl->nextFreeByteOffset += 4; } void clInsertRHTXBoundary(ControlList* cl, uint32_t boundary) //sint16 { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByte++; - *(uint16_t*)cl->nextFreeByte = moveBits(boundary, 16, 0); cl->nextFreeByte += 2; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByteOffset++; + *(uint16_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(boundary, 16, 0); cl->nextFreeByteOffset += 2; } uint32_t f32_to_f187(float f32) @@ -517,9 +510,9 @@ void clInsertDepthOffset(ControlList* cl, float factor) { assert(cl); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = moveBits(f32_to_f187(factor), 16, 0) | moveBits(f32_to_f187(units), 16, 16); cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(f32_to_f187(factor), 16, 0) | moveBits(f32_to_f187(units), 16, 16); cl->nextFreeByteOffset += 4; } void clInsertClipWindow(ControlList* cl, @@ -529,11 +522,10 @@ void clInsertClipWindow(ControlList* cl, uint32_t leftPixelCoord) //uint16 { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByteOffset += 4; } uint16_t get16bitSignedFixedNumber(float x) @@ -549,12 +541,11 @@ void clInsertViewPortOffset(ControlList* cl, ) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByteOffset++; //expects 16 bit signed fixed point number with 4 fractional bits - *(uint16_t*)cl->nextFreeByte = get16bitSignedFixedNumber(x); cl->nextFreeByte += 2; - *(uint16_t*)cl->nextFreeByte = get16bitSignedFixedNumber(y); cl->nextFreeByte += 2; + *(uint16_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = get16bitSignedFixedNumber(x); cl->nextFreeByteOffset += 2; + *(uint16_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = get16bitSignedFixedNumber(y); cl->nextFreeByteOffset += 2; } void clInsertZMinMaxClippingPlanes(ControlList* cl, @@ -563,10 +554,10 @@ void clInsertZMinMaxClippingPlanes(ControlList* cl, ) { assert(cl); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByte++; - *(float*)cl->nextFreeByte = minZw; cl->nextFreeByte += 4; - *(float*)cl->nextFreeByte = maxZw; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByteOffset++; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = minZw; cl->nextFreeByteOffset += 4; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = maxZw; cl->nextFreeByteOffset += 4; } void clInsertClipperXYScaling(ControlList* cl, @@ -575,11 +566,10 @@ void clInsertClipperXYScaling(ControlList* cl, ) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByte++; - *(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4; - *(float*)cl->nextFreeByte = height; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByteOffset++; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = width; cl->nextFreeByteOffset += 4; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = height; cl->nextFreeByteOffset += 4; } void clInsertClipperZScaleOffset(ControlList* cl, @@ -588,11 +578,10 @@ void clInsertClipperZScaleOffset(ControlList* cl, ) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByte++; - *(float*)cl->nextFreeByte = zScale; cl->nextFreeByte += 4; - *(float*)cl->nextFreeByte = zOffset; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByteOffset++; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = zScale; cl->nextFreeByteOffset += 4; + *(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = zOffset; cl->nextFreeByteOffset += 4; } void clInsertTileBinningModeConfiguration(ControlList* cl, @@ -610,12 +599,11 @@ void clInsertTileBinningModeConfiguration(ControlList* cl, ) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = tileAllocationMemorySize; cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = tileStateDataArrayAddress; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = tileAllocationMemoryAddress; cl->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = tileAllocationMemorySize; cl->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = tileStateDataArrayAddress; cl->nextFreeByteOffset += 4; uint32_t tileSizeW = 64; uint32_t tileSizeH = 64; @@ -632,15 +620,15 @@ void clInsertTileBinningModeConfiguration(ControlList* cl, uint32_t widthInTiles = divRoundUp(widthInPixels, tileSizeW); uint32_t heightInTiles = divRoundUp(heightInPixels, tileSizeH); - *(uint8_t*)cl->nextFreeByte = widthInTiles; cl->nextFreeByte++; - *(uint8_t*)cl->nextFreeByte = heightInTiles; cl->nextFreeByte++; - *cl->nextFreeByte = + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = widthInTiles; cl->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = heightInTiles; cl->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(multisampleMode4x, 1, 0) | moveBits(tileBuffer64BitColorDepth, 1, 1) | moveBits(autoInitializeTileStateDataArray, 1, 2) | moveBits(tileAllocationInitialBlockSize, 2, 3) | moveBits(tileAllocationBlockSize, 2, 5) | - moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByte++; + moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByteOffset++; } /* @@ -698,11 +686,10 @@ void clInsertGEMRelocations(ControlList* cl, uint32_t buffer1) { assert(cl); - assert(cl->buffer); - assert(cl->nextFreeByte); - *cl->nextFreeByte = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = buffer0; cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = buffer1; cl->nextFreeByte += 4; + assert(cl->CPA); + *(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByteOffset++; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = buffer0; cl->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = buffer1; cl->nextFreeByteOffset += 4; } //input: 2 cls (cl, handles cl) @@ -729,34 +716,33 @@ void clInsertShaderRecord(ControlList* cls, ControlListAddress coordinateCodeAddress) { assert(cls); - assert(cls->buffer); - assert(cls->nextFreeByte); - *cls->nextFreeByte = + assert(cls->CPA); + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(fragmentShaderIsSingleThreaded, 1, 0) | moveBits(pointSizeIncludedInShadedVertexData, 1, 1) | - moveBits(enableClipping, 1, 2); cls->nextFreeByte++; - *cls->nextFreeByte = 0; cls->nextFreeByte++; - *(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUsedUniforms, 16, 0); cls->nextFreeByte++; - *cls->nextFreeByte |= fragmentNumberOfVaryings; cls->nextFreeByte++; - clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &fragmentCodeAddress); - *(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4; - *(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4; + moveBits(enableClipping, 1, 2); cls->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = 0; cls->nextFreeByteOffset++; + *(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(fragmentNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) |= fragmentNumberOfVaryings; cls->nextFreeByteOffset++; + clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &fragmentCodeAddress); + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = fragmentCodeAddress.offset; cls->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = fragmentUniformsAddress; cls->nextFreeByteOffset += 4; - *(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUsedUniforms, 16, 0); cls->nextFreeByte += 2; - *cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++; - *cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++; - clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &vertexCodeAddress); + *(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(vertexNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset += 2; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexAttributeArraySelectBits; cls->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexTotalAttributesSize; cls->nextFreeByteOffset++; + clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &vertexCodeAddress); //wtf??? --> shader code will always have an offset of 0 so this is fine uint32_t offset = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); - *(uint32_t*)cls->nextFreeByte = offset; cls->nextFreeByte += 4; - cls->nextFreeByte += 4; + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = offset; cls->nextFreeByteOffset += 4; + cls->nextFreeByteOffset += 4; - *(uint16_t*)cls->nextFreeByte = moveBits(coordinateNumberOfUsedUniforms, 16, 0); cls->nextFreeByte += 2; - *cls->nextFreeByte = coordinateAttributeArraySelectBits; cls->nextFreeByte++; - *cls->nextFreeByte = coordinateTotalAttributesSize; cls->nextFreeByte++; - clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &coordinateCodeAddress); - *(uint32_t*)cls->nextFreeByte = coordinateCodeAddress.offset; cls->nextFreeByte += 4; - *(uint32_t*)cls->nextFreeByte = coordinateUniformsAddress; cls->nextFreeByte += 4; + *(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(coordinateNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset += 2; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateAttributeArraySelectBits; cls->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateTotalAttributesSize; cls->nextFreeByteOffset++; + clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &coordinateCodeAddress); + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateCodeAddress.offset; cls->nextFreeByteOffset += 4; + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateUniformsAddress; cls->nextFreeByteOffset += 4; } //input: 2 cls (cl, handles cl) @@ -771,27 +757,26 @@ void clInsertAttributeRecord(ControlList* cls, uint32_t coordinateVPMOffset) { assert(cls); - assert(cls->buffer); - assert(cls->nextFreeByte); + assert(cls->CPA); uint32_t sizeBytesMinusOne = sizeBytes - 1; - clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &address); - *(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4; - *cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++; - *cls->nextFreeByte = stride; cls->nextFreeByte++; - *cls->nextFreeByte = vertexVPMOffset; cls->nextFreeByte++; - *cls->nextFreeByte = coordinateVPMOffset; cls->nextFreeByte++; + clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &address); + *(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = address.offset; cls->nextFreeByteOffset += 4; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = sizeBytesMinusOne; cls->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = stride; cls->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexVPMOffset; cls->nextFreeByteOffset++; + *(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateVPMOffset; cls->nextFreeByteOffset++; } -uint32_t clGetHandleIndex(ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, uint32_t handle) +uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesSize, uint32_t handle) { uint32_t c = 0; //if curr marker is closed already we need to work with the stored size - uint32_t numHandles = (handlesSize ? handlesSize : (handlesCl->nextFreeByte - handlesBuf)) / 4; + uint32_t numHandles = (handlesSize ? handlesSize : (handlesCl->nextFreeByteOffset - handlesCl->offset)) / 4; for(; c < numHandles; ++c) { - if(((uint32_t*)handlesBuf)[c] == handle) + if(((uint32_t*)getCPAptrFromOffset(handlesCl->CPA, handlesCl->offset))[c] == handle) { //found return c; @@ -799,27 +784,25 @@ uint32_t clGetHandleIndex(ControlList* handlesCl, uint8_t* handlesBuf, uint32_t } //write handle to handles cl - *(uint32_t*)handlesCl->nextFreeByte = handle; - handlesCl->nextFreeByte += 4; + *(uint32_t*)getCPAptrFromOffset(handlesCl->CPA, handlesCl->nextFreeByteOffset) = handle; + handlesCl->nextFreeByteOffset += 4; return c; } //input: 2 cls (cl + handles cl) -inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, const ControlListAddress* address) +inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesSize, const ControlListAddress* address) { assert(relocCl); - assert(relocCl->buffer); - assert(relocCl->nextFreeByte); + assert(relocCl->CPA); assert(handlesCl); - assert(handlesCl->buffer); - assert(handlesCl->nextFreeByte); + assert(handlesCl->CPA); assert(address); assert(address->handle); //store offset within handles in cl - *(uint32_t*)relocCl->nextFreeByte = clGetHandleIndex(handlesCl, handlesBuf, handlesSize, address->handle); - relocCl->nextFreeByte += 4; + *(uint32_t*)getCPAptrFromOffset(relocCl->CPA, relocCl->nextFreeByteOffset) = clGetHandleIndex(handlesCl, handlesSize, address->handle); + relocCl->nextFreeByteOffset += 4; } inline void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address) diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index 07d8434..e9acf06 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -15,7 +15,8 @@ typedef struct ControlListAddress typedef struct CLMarker { //current binning cl buf position is this struct in the CL plus sizeof(this struct) - struct CLMarker* nextMarker; //TODO change to offset, could be reallocated + //struct CLMarker* nextMarker; // + uint32_t nextMarkerOffset; uint32_t size; //in bytes void* writeImage; //_image* to render to void* readImage; @@ -41,25 +42,32 @@ typedef struct CLMarker //pointers that point to where all the other CL data is //plus sizes - uint8_t* handlesBuf; //TODO change to offset, could be reallocated + //uint8_t* handlesBuf; // + uint32_t handlesBufOffset; uint32_t handlesSize; - uint8_t* shaderRecBuf; //TODO change to offset, could be reallocated + //uint8_t* shaderRecBuf; // + uint32_t shaderRecBufOffset; uint32_t shaderRecSize; uint32_t shaderRecCount; - uint8_t* uniformsBuf; //TODO change to offset, could be reallocated + //uint8_t* uniformsBuf; // + uint32_t uniformsBufOffset; uint32_t uniformsSize; } CLMarker; typedef struct ControlList { - uint8_t* buffer; + void* CPA; + //uint8_t* buffer; + uint32_t offset; //offset into CPA buf uint32_t numBlocks; uint32_t blockSize; - uint8_t* nextFreeByte; //pointer to the next available free byte - CLMarker* currMarker; //TODO change to offset, could be reallocated + //uint8_t* nextFreeByte; //pointer to the next available free byte + uint32_t nextFreeByteOffset; //pointer to the next available free byte + //CLMarker* currMarker; + uint32_t currMarkerOffset; } ControlList; -void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, const ControlListAddress* address); +void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesSize, const ControlListAddress* address); void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address); #define __gen_user_data struct ControlList @@ -74,7 +82,7 @@ void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address); uint32_t divRoundUp(uint32_t n, uint32_t d); uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset); uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size); -void clInit(ControlList* cl, void* buffer, uint32_t blockSize); +void clInit(ControlList* cl, void* CPA, uint32_t offset, uint32_t blockSize); void clInsertNewCLMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, @@ -206,7 +214,7 @@ void clInsertAttributeRecord(ControlList* cls, uint32_t stride, uint32_t vertexVPMOffset, uint32_t coordinateVPMOffset); -uint32_t clGetHandleIndex(ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, uint32_t handle); +uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesSize, uint32_t handle); #if defined (__cplusplus) } diff --git a/driver/command.c b/driver/command.c index 6e13055..4d59af2 100644 --- a/driver/command.c +++ b/driver/command.c @@ -121,10 +121,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers( pCommandBuffers[c]->usageFlags = 0; pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL; pCommandBuffers[c]->cp = cp; - clInit(&pCommandBuffers[c]->binCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); - clInit(&pCommandBuffers[c]->handlesCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); - clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); - clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); + clInit(&pCommandBuffers[c]->binCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); + clInit(&pCommandBuffers[c]->handlesCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); + clInit(&pCommandBuffers[c]->shaderRecCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); + clInit(&pCommandBuffers[c]->uniformsCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize); pCommandBuffers[c]->graphicsPipeline = 0; pCommandBuffers[c]->computePipeline = 0; @@ -151,25 +151,25 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers( pCommandBuffers[c]->perfmonID = 0; - if(!pCommandBuffers[c]->binCl.buffer) + if(pCommandBuffers[c]->binCl.offset == -1) { res = VK_ERROR_OUT_OF_HOST_MEMORY; break; } - if(!pCommandBuffers[c]->handlesCl.buffer) + if(pCommandBuffers[c]->handlesCl.offset == -1) { res = VK_ERROR_OUT_OF_HOST_MEMORY; break; } - if(!pCommandBuffers[c]->shaderRecCl.buffer) + if(pCommandBuffers[c]->shaderRecCl.offset == -1) { res = VK_ERROR_OUT_OF_HOST_MEMORY; break; } - if(!pCommandBuffers[c]->uniformsCl.buffer) + if(pCommandBuffers[c]->uniformsCl.offset == -1) { res = VK_ERROR_OUT_OF_HOST_MEMORY; break; @@ -183,10 +183,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers( { for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c) { - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->binCl.buffer, pCommandBuffers[c]->binCl.numBlocks); - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->handlesCl.buffer, pCommandBuffers[c]->handlesCl.numBlocks); - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->shaderRecCl.buffer, pCommandBuffers[c]->shaderRecCl.numBlocks); - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->uniformsCl.buffer, pCommandBuffers[c]->uniformsCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->binCl.offset), pCommandBuffers[c]->binCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->handlesCl.offset), pCommandBuffers[c]->handlesCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->shaderRecCl.offset), pCommandBuffers[c]->shaderRecCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->uniformsCl.offset), pCommandBuffers[c]->uniformsCl.numBlocks); poolFree(&cp->pa, pCommandBuffers[c]); pCommandBuffers[c] = 0; } @@ -290,14 +290,14 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( { VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c]; - if(!cmdbuf->binCl.currMarker) + if(cmdbuf->binCl.currMarkerOffset == -1) { //no markers recorded yet, skip continue; } //first entry is assumed to be a marker - CLMarker* marker = cmdbuf->binCl.buffer; + CLMarker* marker = getCPAptrFromOffset(cmdbuf->binCl.CPA, cmdbuf->binCl.offset); //a command buffer may contain multiple render passes //and commands outside render passes such as clear commands @@ -327,12 +327,12 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( uint32_t readMSAAdepthStencilImage = marker->readMSAAdepthStencilImage; //This should not result in an insertion! - uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeImage->boundMem->bo) : 0; - uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, readImage->boundMem->bo) : 0; - uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0; - uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0; - uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeMSAAimage->boundMem->bo) : 0; - uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0; + uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeImage->boundMem->bo) : 0; + uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, readImage->boundMem->bo) : 0; + uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0; + uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0; + uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeMSAAimage->boundMem->bo) : 0; + uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0; // fprintf(stderr, "writeImage: %u\n", writeImage); // fprintf(stderr, "readImage: %u\n", readImage); @@ -510,10 +510,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( submitCl.height = height; submitCl.flags |= marker->flags; - submitCl.bo_handles = marker->handlesBuf; + submitCl.bo_handles = getCPAptrFromOffset(cmdbuf->handlesCl.CPA, marker->handlesBufOffset); submitCl.bin_cl = ((uint8_t*)marker) + sizeof(CLMarker); - submitCl.shader_rec = marker->shaderRecBuf; - submitCl.uniforms = marker->uniformsBuf; + submitCl.shader_rec = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset); + submitCl.uniforms = getCPAptrFromOffset(cmdbuf->uniformsCl.CPA, marker->uniformsBufOffset); if(marker->perfmonID) { @@ -653,7 +653,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( } //advance in linked list - marker = marker->nextMarker; + marker = marker->nextMarkerOffset == -1 ? 0 : getCPAptrFromOffset(cmdbuf->binCl.CPA, marker->nextMarkerOffset); } } @@ -706,10 +706,10 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkFreeCommandBuffers( { if(pCommandBuffers[c]) { - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->binCl.buffer, pCommandBuffers[c]->binCl.numBlocks); - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->handlesCl.buffer, pCommandBuffers[c]->handlesCl.numBlocks); - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->shaderRecCl.buffer, pCommandBuffers[c]->shaderRecCl.numBlocks); - consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->uniformsCl.buffer, pCommandBuffers[c]->uniformsCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->binCl.offset), pCommandBuffers[c]->binCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->handlesCl.offset), pCommandBuffers[c]->handlesCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->shaderRecCl.offset), pCommandBuffers[c]->shaderRecCl.numBlocks); + consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->uniformsCl.offset), pCommandBuffers[c]->uniformsCl.numBlocks); poolFree(&cp->pa, pCommandBuffers[c]); } } @@ -830,10 +830,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkResetCommandBuffer( //reset commandbuffer state commandBuffer->shaderRecCount = 0; - clInit(&commandBuffer->binCl, commandBuffer->binCl.buffer, commandBuffer->cp->cpa.blockSize); - clInit(&commandBuffer->handlesCl, commandBuffer->handlesCl.buffer, commandBuffer->cp->cpa.blockSize); - clInit(&commandBuffer->shaderRecCl, commandBuffer->shaderRecCl.buffer, commandBuffer->cp->cpa.blockSize); - clInit(&commandBuffer->uniformsCl, commandBuffer->uniformsCl.buffer, commandBuffer->cp->cpa.blockSize); + clInit(&commandBuffer->binCl, &commandBuffer->cp->cpa, commandBuffer->binCl.offset, commandBuffer->cp->cpa.blockSize); + clInit(&commandBuffer->handlesCl, &commandBuffer->cp->cpa, commandBuffer->handlesCl.offset, commandBuffer->cp->cpa.blockSize); + clInit(&commandBuffer->shaderRecCl, &commandBuffer->cp->cpa, commandBuffer->shaderRecCl.offset, commandBuffer->cp->cpa.blockSize); + clInit(&commandBuffer->uniformsCl, &commandBuffer->cp->cpa, commandBuffer->uniformsCl.offset, commandBuffer->cp->cpa.blockSize); commandBuffer->graphicsPipeline = 0; commandBuffer->computePipeline = 0; diff --git a/driver/common.c b/driver/common.c index 05e93c1..7265413 100644 --- a/driver/common.c +++ b/driver/common.c @@ -533,12 +533,12 @@ void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize) { if(!clHasEnoughSpace(cl, commandSize)) { - uint32_t currSize = cl->nextFreeByte - cl->buffer; - uint32_t currMarkerOffset = (uint8_t*)cl->currMarker - cl->buffer; - cl->buffer = consecutivePoolReAllocate(&cb->cp->cpa, cl->buffer, cl->numBlocks); assert(cl->buffer); - cl->nextFreeByte = cl->buffer + currSize; + uint32_t currSize = cl->nextFreeByteOffset - cl->offset; + uint32_t currMarkerOffset = cl->currMarkerOffset - cl->offset; + cl->offset = consecutivePoolReAllocate(&cb->cp->cpa, getCPAptrFromOffset(cl->CPA, cl->offset), cl->numBlocks); assert(cl->offset != -1); + cl->nextFreeByteOffset = cl->offset + currSize; cl->numBlocks++; - cl->currMarker = cl->buffer + currMarkerOffset; + cl->currMarkerOffset = cl->offset + currMarkerOffset; } } diff --git a/driver/descriptorSet.c b/driver/descriptorSet.c index e5809de..a070615 100644 --- a/driver/descriptorSet.c +++ b/driver/descriptorSet.c @@ -162,20 +162,20 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateDescriptorSets( if(imageDescriptorCount > 0) { - ds->imageDescriptors = consecutivePoolAllocate(&dp->imageDescriptorCPA, imageDescriptorCount); - ds->imageBindingMap = createMap(consecutivePoolAllocate(&dp->mapElementCPA, imageDescriptorCount), imageDescriptorCount); + ds->imageDescriptors = getCPAptrFromOffset(&dp->imageDescriptorCPA, consecutivePoolAllocate(&dp->imageDescriptorCPA, imageDescriptorCount)); + ds->imageBindingMap = createMap(getCPAptrFromOffset(&dp->mapElementCPA, consecutivePoolAllocate(&dp->mapElementCPA, imageDescriptorCount)), imageDescriptorCount); } if(bufferDescriptorCount > 0) { - ds->bufferDescriptors = consecutivePoolAllocate(&dp->bufferDescriptorCPA, bufferDescriptorCount); - ds->bufferBindingMap = createMap(consecutivePoolAllocate(&dp->mapElementCPA, bufferDescriptorCount), bufferDescriptorCount); + ds->bufferDescriptors = getCPAptrFromOffset(&dp->bufferDescriptorCPA, consecutivePoolAllocate(&dp->bufferDescriptorCPA, bufferDescriptorCount)); + ds->bufferBindingMap = createMap(getCPAptrFromOffset(&dp->mapElementCPA, consecutivePoolAllocate(&dp->mapElementCPA, bufferDescriptorCount)), bufferDescriptorCount); } if(texelBufferDescriptorCount > 0) { - ds->texelBufferDescriptors = consecutivePoolAllocate(&dp->texelBufferDescriptorCPA, texelBufferDescriptorCount); - ds->texelBufferBindingMap = createMap(consecutivePoolAllocate(&dp->mapElementCPA, texelBufferDescriptorCount), texelBufferDescriptorCount); + ds->texelBufferDescriptors = getCPAptrFromOffset(&dp->texelBufferDescriptorCPA, consecutivePoolAllocate(&dp->texelBufferDescriptorCPA, texelBufferDescriptorCount)); + ds->texelBufferBindingMap = createMap(getCPAptrFromOffset(&dp->mapElementCPA, consecutivePoolAllocate(&dp->mapElementCPA, texelBufferDescriptorCount)), texelBufferDescriptorCount); } //TODO immutable samplers diff --git a/driver/draw.c b/driver/draw.c index 6feb28c..75ceac9 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -8,6 +8,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) assert(commandBuffer); _commandBuffer* cb = commandBuffer; + CLMarker* currMarker = getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset); //TODO handle cases when submitting >65k vertices in a VBO //TODO HW-2116 workaround @@ -224,8 +225,8 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) clInsertShaderRecord(&commandBuffer->shaderRecCl, &relocCl, &commandBuffer->handlesCl, - cb->binCl.currMarker->handlesBuf, - cb->binCl.currMarker->handlesSize, + getCPAptrFromOffset(cb->handlesCl.CPA, currMarker->handlesBufOffset), + currMarker->handlesSize, !fragModule->hasThreadSwitch, 0, //TODO point size included in shaded vertex data? 1, //enable clipping @@ -310,8 +311,8 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) clInsertAttributeRecord(&commandBuffer->shaderRecCl, &relocCl, &commandBuffer->handlesCl, - cb->binCl.currMarker->handlesBuf, - cb->binCl.currMarker->handlesSize, + getCPAptrFromOffset(cb->handlesCl.CPA, currMarker->handlesBufOffset), + currMarker->handlesSize, vertexBuffer, //reloc address formatByteSize, stride, @@ -349,7 +350,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for texture BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, di->imageView->image->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, di->imageView->image->boundMem->bo); //emit tex bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -368,7 +369,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, db->buffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, db->buffer->boundMem->bo); //emit bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -385,7 +386,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) //emit reloc for BO clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, dtb->bufferView->buffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, dtb->bufferView->buffer->boundMem->bo); //emit bo reloc index clFit(commandBuffer, &commandBuffer->uniformsCl, 4); @@ -588,9 +589,10 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdDrawIndexed( uint32_t maxIndex = drawCommon(commandBuffer, vertexOffset); _commandBuffer* cb = commandBuffer; + CLMarker* currMarker = getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset); clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, cb->indexBuffer->boundMem->bo); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, cb->indexBuffer->boundMem->bo); clInsertGEMRelocations(&commandBuffer->binCl, idx, 0); diff --git a/driver/renderpass.c b/driver/renderpass.c index 1f31513..ef01159 100644 --- a/driver/renderpass.c +++ b/driver/renderpass.c @@ -127,21 +127,22 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker)); clInsertNewCLMarker(&commandBuffer->binCl, &cb->handlesCl, &cb->shaderRecCl, cb->shaderRecCount, &cb->uniformsCl); - commandBuffer->binCl.currMarker->writeImage = writeImage; - commandBuffer->binCl.currMarker->writeImageOffset = writeImageOffset; - commandBuffer->binCl.currMarker->readImage = readImage; - commandBuffer->binCl.currMarker->readImageOffset = readImageOffset; - commandBuffer->binCl.currMarker->writeDepthStencilImage = writeDepthStencilImage; - commandBuffer->binCl.currMarker->writeDepthStencilImageOffset = writeDepthStencilImageOffset; - commandBuffer->binCl.currMarker->readDepthStencilImage = readDepthStencilImage; - commandBuffer->binCl.currMarker->readDepthStencilImageOffset = readDepthStencilImageOffset; - commandBuffer->binCl.currMarker->writeMSAAimage = writeMSAAimage; - commandBuffer->binCl.currMarker->writeMSAAimageOffset = writeMSAAimageOffset; - commandBuffer->binCl.currMarker->writeMSAAdepthStencilImage = writeMSAAdepthStencilImage; - commandBuffer->binCl.currMarker->writeMSAAdepthStencilImageOffset = writeMSAAdepthStencilImageOffset; - commandBuffer->binCl.currMarker->performResolve = performResolve; - commandBuffer->binCl.currMarker->readMSAAimage = readMSAAimage; - commandBuffer->binCl.currMarker->readMSAAdepthStencilImage = readMSAAdepthStencilImage; + CLMarker* currMarker = getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset); + currMarker->writeImage = writeImage; + currMarker->writeImageOffset = writeImageOffset; + currMarker->readImage = readImage; + currMarker->readImageOffset = readImageOffset; + currMarker->writeDepthStencilImage = writeDepthStencilImage; + currMarker->writeDepthStencilImageOffset = writeDepthStencilImageOffset; + currMarker->readDepthStencilImage = readDepthStencilImage; + currMarker->readDepthStencilImageOffset = readDepthStencilImageOffset; + currMarker->writeMSAAimage = writeMSAAimage; + currMarker->writeMSAAimageOffset = writeMSAAimageOffset; + currMarker->writeMSAAdepthStencilImage = writeMSAAdepthStencilImage; + currMarker->writeMSAAdepthStencilImageOffset = writeMSAAdepthStencilImageOffset; + currMarker->performResolve = performResolve; + currMarker->readMSAAimage = readMSAAimage; + currMarker->readMSAAdepthStencilImage = readMSAAdepthStencilImage; if(rp->subpasses[0].colorAttachmentCount > 0) { @@ -153,14 +154,14 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB if(!rp->subpasses[0].pResolveAttachments) { - cb->binCl.currMarker->clearColor[0] = - cb->binCl.currMarker->clearColor[1] = + currMarker->clearColor[0] = + currMarker->clearColor[1] = packVec4IntoABGR8(pRenderPassBegin->pClearValues[rp->subpasses[0].pColorAttachments[0].attachment].color.float32); } else { - cb->binCl.currMarker->clearColor[0] = - cb->binCl.currMarker->clearColor[1] = + currMarker->clearColor[0] = + currMarker->clearColor[1] = packVec4IntoABGR8(pRenderPassBegin->pClearValues[rp->subpasses[0].pColorAttachments[0].attachment].color.float32); } } @@ -173,7 +174,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB { flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; - cb->binCl.currMarker->clearDepth = + currMarker->clearDepth = (uint32_t)(pRenderPassBegin->pClearValues[rp->subpasses[0].pDepthStencilAttachment->attachment].depthStencil.depth * 0xffffff) & 0xffffff; } @@ -181,55 +182,55 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB { flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; - cb->binCl.currMarker->clearStencil = + currMarker->clearStencil = pRenderPassBegin->pClearValues[rp->subpasses[0].pDepthStencilAttachment->attachment].depthStencil.stencil & 0xff; } } - cb->binCl.currMarker->flags = flags; + currMarker->flags = flags; //insert relocs if(writeImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeImage->boundMem->bo); } if(readImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, readImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, readImage->boundMem->bo); } if(writeDepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeDepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeDepthStencilImage->boundMem->bo); } if(readDepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, readDepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, readDepthStencilImage->boundMem->bo); } if(writeMSAAimage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAimage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeMSAAimage->boundMem->bo); } if(writeMSAAdepthStencilImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); } uint32_t bpp = 0; - cb->binCl.currMarker->width = fb->width; - cb->binCl.currMarker->height = fb->height; + currMarker->width = fb->width; + currMarker->height = fb->height; if(writeImage) { @@ -247,11 +248,11 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB } //pad render size if we are rendering to a mip level - cb->binCl.currMarker->renderToMip = biggestMip > 0; + currMarker->renderToMip = biggestMip > 0; - uint32_t width = cb->binCl.currMarker->width; + uint32_t width = currMarker->width; - if(cb->binCl.currMarker->renderToMip) + if(currMarker->renderToMip) { width = getPow2Pad(width); width = width < 4 ? 4 : width; @@ -265,7 +266,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB 0, //auto initialize tile state data array bpp == 64, //64 bit color mode writeMSAAimage || writeMSAAdepthStencilImage || performResolve ? 1 : 0, //msaa - width, cb->binCl.currMarker->height, + width, currMarker->height, 0, //tile state data array address 0, //tile allocation memory size 0); //tile allocation memory address @@ -277,7 +278,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length); clInsertStartTileBinning(&commandBuffer->binCl); - cb->binCl.currMarker->perfmonID = cb->perfmonID; + currMarker->perfmonID = cb->perfmonID; cb->currRenderPass = rp; } diff --git a/driver/stateChange.c b/driver/stateChange.c index fe94e4e..203acad 100644 --- a/driver/stateChange.c +++ b/driver/stateChange.c @@ -512,11 +512,13 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage( { //Simplest case: just submit a job to clear the image clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker)); clInsertNewCLMarker(&commandBuffer->binCl, &commandBuffer->handlesCl, &commandBuffer->shaderRecCl, commandBuffer->shaderRecCount, &commandBuffer->uniformsCl); - commandBuffer->binCl.currMarker->writeImage = i; + + CLMarker* currMarker = getCPAptrFromOffset(commandBuffer->binCl.CPA, commandBuffer->binCl.currMarkerOffset); + currMarker->writeImage = i; //insert reloc for render target clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, i->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, i->boundMem->bo); clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length); clInsertTileBinningModeConfiguration(&commandBuffer->binCl, @@ -548,11 +550,11 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage( clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length); clInsertFlush(&commandBuffer->binCl); - commandBuffer->binCl.currMarker->clearColor[0] = commandBuffer->binCl.currMarker->clearColor[1] = packVec4IntoABGR8(pColor->float32); - commandBuffer->binCl.currMarker->flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; + currMarker->clearColor[0] = currMarker->clearColor[1] = packVec4IntoABGR8(pColor->float32); + currMarker->flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; - commandBuffer->binCl.currMarker->width = i->width; - commandBuffer->binCl.currMarker->height = i->height; + currMarker->width = i->width; + currMarker->height = i->height; } } diff --git a/test/CPAtest/CPAtest.cpp b/test/CPAtest/CPAtest.cpp index 8f2816a..56ef711 100644 --- a/test/CPAtest/CPAtest.cpp +++ b/test/CPAtest/CPAtest.cpp @@ -18,37 +18,37 @@ void simpleTest() ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size); CPAdebugPrint(&cpa); - void* mem1 = consecutivePoolAllocate(&cpa, 1); + uint32_t mem1 = consecutivePoolAllocate(&cpa, 1); CPAdebugPrint(&cpa); - void* mem2 = consecutivePoolAllocate(&cpa, 2); + uint32_t mem2 = consecutivePoolAllocate(&cpa, 2); CPAdebugPrint(&cpa); - void* mem3 = consecutivePoolAllocate(&cpa, 3); + uint32_t mem3 = consecutivePoolAllocate(&cpa, 3); CPAdebugPrint(&cpa); - void* mem11 = consecutivePoolAllocate(&cpa, 1); + uint32_t mem11 = consecutivePoolAllocate(&cpa, 1); CPAdebugPrint(&cpa); - void* mem111 = consecutivePoolAllocate(&cpa, 1); + uint32_t mem111 = consecutivePoolAllocate(&cpa, 1); CPAdebugPrint(&cpa); - void* mem0 = consecutivePoolAllocate(&cpa, 1); + uint32_t mem0 = consecutivePoolAllocate(&cpa, 1); fprintf(stderr, "\n%p\n", mem0); - consecutivePoolFree(&cpa, mem11, 1); + consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem11), 1); CPAdebugPrint(&cpa); - consecutivePoolFree(&cpa, mem111, 1); + consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem111), 1); CPAdebugPrint(&cpa); - consecutivePoolFree(&cpa, mem2, 2); + consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem2), 2); CPAdebugPrint(&cpa); - consecutivePoolFree(&cpa, mem3, 3); + consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem3), 3); CPAdebugPrint(&cpa); - consecutivePoolFree(&cpa, mem1, 1); + consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem1), 1); CPAdebugPrint(&cpa); } @@ -61,7 +61,7 @@ void allocTest(uint32_t numToAlloc) ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size); //CPAdebugPrint(&cpa); - void* mem1 = consecutivePoolAllocate(&cpa, numToAlloc); + uint32_t mem1 = consecutivePoolAllocate(&cpa, numToAlloc); CPAdebugPrint(&cpa); fprintf(stderr, "\nmem %p\n", mem1); @@ -76,25 +76,52 @@ void freeOneTest(uint32_t which) ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size); //CPAdebugPrint(&cpa); - void* mem[8]; + uint32_t mem[8]; for(uint32_t c = 0; c < 8; ++c) { mem[c] = consecutivePoolAllocate(&cpa, 1); } - consecutivePoolFree(&cpa, mem[which], 1); + consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem[which]), 1); CPAdebugPrint(&cpa); //fprintf(stderr, "\nmem %p\n", mem); } -int main() { - //simpleTest(); +void reallocTest() +{ + uint32_t blocksize = 16; + uint32_t numblocks = 3; + uint32_t size = numblocks * blocksize; - allocTest(1); - allocTest(3); - allocTest(8); - allocTest(9); + ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size); + CPAdebugPrint(&cpa); + + uint32_t mem1 = consecutivePoolAllocate(&cpa, 1); + CPAdebugPrint(&cpa); + + uint32_t mem2 = consecutivePoolAllocate(&cpa, 1); + CPAdebugPrint(&cpa); + + consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem1), 1); + CPAdebugPrint(&cpa); + + mem2 = consecutivePoolReAllocate(&cpa, getCPAptrFromOffset(&cpa, mem2), 1); + CPAdebugPrint(&cpa); + + uint32_t mem0 = consecutivePoolAllocate(&cpa, 1); + fprintf(stderr, "\n%p\n", mem0); +} + +int main() { +// simpleTest(); + + reallocTest(); + +// allocTest(1); +// allocTest(3); +// allocTest(8); +// allocTest(9); return 0; }