1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-01-30 22:52:14 +01:00

moved CPA from naked pointers to offsets

as buffers can be reallocated
This commit is contained in:
yours3lf 2020-05-16 13:17:03 +01:00
parent 1abeee4d14
commit 0ac879c294
11 changed files with 372 additions and 338 deletions

View File

@ -44,7 +44,8 @@ void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa)
}
//allocate numBlocks consecutive memory
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
//return an offset into the pool buffer, as pool could be reallocated!
uint32_t consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
{
assert(pa);
assert(pa->buf);
@ -56,7 +57,7 @@ void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
if(!ptr)
{
return 0; //no free blocks
return -1; //no free blocks
}
for(; ptr; ptr = *ptr)
@ -89,7 +90,7 @@ void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
//TODO debug stuff, not for release
if(ptr) memset(ptr, 0, numBlocks * pa->blockSize);
return ptr;
return (char*)ptr - pa->buf;
}
//free numBlocks consecutive memory
@ -167,17 +168,13 @@ void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBloc
}
}
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks)
uint32_t consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks)
{
assert(pa);
assert(pa->buf);
assert(currentMem);
assert(currNumBlocks);
assert(0);
//fprintf(stderr, "CPA realloc\n");
uint32_t* nextCandidate = (char*)currentMem + pa->blockSize * currNumBlocks;
uint32_t* prevPtr = 0;
@ -185,14 +182,18 @@ void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem,
{
if(listPtr == nextCandidate)
{
//if the free list contains an element that points right after our currentMem
//we can just use that one
*prevPtr = *listPtr;
//update next free block to be the one after our current candidate
if(prevPtr)
{
*prevPtr = *listPtr;
pa->nextFreeBlock = prevPtr;
}
else if(*listPtr)
{
pa->nextFreeBlock = *listPtr;
}
//TODO debug stuff, not for release
memset(nextCandidate, 0, pa->blockSize);
return currentMem;
return (char*)currentMem - pa->buf;
}
prevPtr = listPtr;
@ -204,7 +205,7 @@ void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem,
if(!newMem)
{
return 0;
return -1;
}
//copy over old content
@ -212,10 +213,19 @@ void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem,
//free current element
consecutivePoolFree(pa, currentMem, currNumBlocks);
return newMem;
return (char*)newMem - pa->buf;
}
}
void* getCPAptrFromOffset(ConsecutivePoolAllocator* pa, uint32_t offset)
{
assert(pa);
assert(pa->buf);
assert(offset <= pa->size - pa->blockSize);
return pa->buf + offset;
}
void CPAdebugPrint(ConsecutivePoolAllocator* pa)
{
fprintf(stderr, "\nCPA Debug Print\n");

View File

@ -18,10 +18,11 @@ typedef struct ConsecutivePoolAllocator
ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s);
void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa);
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks);
uint32_t consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks);
void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks);
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks);
uint32_t consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks);
void CPAdebugPrint(ConsecutivePoolAllocator* pa);
void* getCPAptrFromOffset(ConsecutivePoolAllocator* pa, uint32_t offset);
#if defined (__cplusplus)
}

View File

@ -1,4 +1,5 @@
#include "ControlListUtil.h"
#include "ConsecutivePoolAllocator.h"
#include <stdint.h>
@ -16,9 +17,8 @@ uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset)
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
uint32_t currSize = cl->nextFreeByte - cl->buffer;
assert(cl->CPA);
uint32_t currSize = cl->nextFreeByteOffset - cl->offset;
if(currSize + size < cl->numBlocks * cl->blockSize - 4)
{
return 1; //fits!
@ -29,15 +29,16 @@ uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
}
}
void clInit(ControlList* cl, void* buffer, uint32_t blockSize)
void clInit(ControlList* cl, void* CPA, uint32_t offset, uint32_t blockSize)
{
assert(cl);
assert(buffer);
cl->buffer = buffer;
assert(CPA);
cl->offset = offset;
cl->numBlocks = 1;
cl->blockSize = blockSize;
cl->nextFreeByte = &cl->buffer[0];
cl->currMarker = 0;
cl->nextFreeByteOffset = offset;
cl->currMarkerOffset = -1;
cl->CPA = CPA;
}
void clInsertNewCLMarker(ControlList* cl,
@ -48,175 +49,173 @@ void clInsertNewCLMarker(ControlList* cl,
{
//to be inserted when you'd insert tile binning mode config
assert(cl);
assert(cl->CPA);
assert(handlesCL);
assert(shaderRecCL);
assert(uniformsCL);
CLMarker marker = {};
marker.handlesBuf = handlesCL->buffer;
marker.shaderRecBuf = shaderRecCL->buffer;
marker.uniformsBuf = uniformsCL->buffer;
marker.handlesBufOffset = handlesCL->offset;
marker.shaderRecBufOffset = shaderRecCL->offset;
marker.uniformsBufOffset = uniformsCL->offset;
marker.nextMarkerOffset = -1;
//close current marker
if(cl->currMarker && !cl->currMarker->size)
if(cl->currMarkerOffset != -1 && !((CLMarker*)getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset))->size)
{
clCloseCurrentMarker(cl, handlesCL, shaderRecCL, shaderRecCount, uniformsCL);
}
//if this is not the first marker
if(cl->currMarker)
if(cl->currMarkerOffset != -1)
{
marker.handlesBuf = cl->currMarker->handlesBuf + cl->currMarker->handlesSize;
marker.shaderRecBuf = cl->currMarker->shaderRecBuf + cl->currMarker->shaderRecSize;
marker.uniformsBuf = cl->currMarker->uniformsBuf + cl->currMarker->uniformsSize;
marker.shaderRecCount = cl->currMarker->shaderRecCount; //initialize with previous marker's data
CLMarker* currMarker = getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset);
marker.handlesBufOffset = currMarker->handlesBufOffset + currMarker->handlesSize;
marker.shaderRecBufOffset = currMarker->shaderRecBufOffset + currMarker->shaderRecSize;
marker.uniformsBufOffset = currMarker->uniformsBufOffset + currMarker->uniformsSize;
marker.shaderRecCount = currMarker->shaderRecCount; //initialize with previous marker's data
}
*(CLMarker*)cl->nextFreeByte = marker;
if(cl->currMarker)
*(CLMarker*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = marker;
if(cl->currMarkerOffset != -1)
{
cl->currMarker->nextMarker = cl->nextFreeByte;
((CLMarker*)getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset))->nextMarkerOffset = cl->nextFreeByteOffset;
}
cl->currMarker = cl->nextFreeByte;
cl->nextFreeByte += sizeof(CLMarker);
cl->currMarkerOffset = cl->nextFreeByteOffset;
cl->nextFreeByteOffset += sizeof(CLMarker);
}
void clCloseCurrentMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, uint32_t shaderRecCount, ControlList* uniformsCL)
{
assert(cl);
assert(cl->CPA);
assert(handlesCL);
assert(shaderRecCL);
assert(uniformsCL);
assert(cl->currMarker);
cl->currMarker->size = cl->nextFreeByte - ((uint8_t*)cl->currMarker + sizeof(CLMarker));
cl->currMarker->handlesSize = handlesCL->nextFreeByte - cl->currMarker->handlesBuf;
cl->currMarker->shaderRecSize = shaderRecCL->nextFreeByte - cl->currMarker->shaderRecBuf;
cl->currMarker->uniformsSize = uniformsCL->nextFreeByte - cl->currMarker->uniformsBuf;
cl->currMarker->shaderRecCount = shaderRecCount - cl->currMarker->shaderRecCount; //update shader rec count to reflect added shader recs
CLMarker* currMarker = getCPAptrFromOffset(cl->CPA, cl->currMarkerOffset);
currMarker->size = cl->nextFreeByteOffset - (cl->currMarkerOffset + sizeof(CLMarker));
currMarker->handlesSize = handlesCL->nextFreeByteOffset - currMarker->handlesBufOffset;
currMarker->shaderRecSize = shaderRecCL->nextFreeByteOffset - currMarker->shaderRecBufOffset;
currMarker->uniformsSize = uniformsCL->nextFreeByteOffset - currMarker->uniformsBufOffset;
currMarker->shaderRecCount = shaderRecCount - currMarker->shaderRecCount; //update shader rec count to reflect added shader recs
}
void clInsertData(ControlList* cl, uint32_t size, uint8_t* data)
{
assert(cl);
memcpy(cl->nextFreeByte, data, size);
cl->nextFreeByte += size;
assert(cl->CPA);
memcpy(getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset), data, size);
cl->nextFreeByteOffset += size;
}
void clInsertUniformConstant(ControlList* cl, uint32_t data)
{
assert(cl);
*(uint32_t*)cl->nextFreeByte = data;
cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = data;
cl->nextFreeByteOffset += 4;
}
void clInsertUniformXYScale(ControlList* cl, float data)
{
assert(cl);
*(float*)cl->nextFreeByte = data;
cl->nextFreeByte += 4;
assert(cl->CPA);
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = data;
cl->nextFreeByteOffset += 4;
}
void clInsertUniformZOffset(ControlList* cl, float data)
{
assert(cl);
*(float*)cl->nextFreeByte = data;
cl->nextFreeByte += 4;
assert(cl->CPA);
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = data;
cl->nextFreeByteOffset += 4;
}
void clInsertHalt(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_HALT_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_HALT_opcode;
cl->nextFreeByteOffset++;
}
void clInsertNop(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_NOP_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_NOP_opcode;
cl->nextFreeByteOffset++;
}
void clInsertFlush(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_FLUSH_opcode;
cl->nextFreeByteOffset++;
}
void clInsertFlushAllState(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_ALL_STATE_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_FLUSH_ALL_STATE_opcode;
cl->nextFreeByteOffset++;
}
void clInsertStartTileBinning(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_START_TILE_BINNING_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_START_TILE_BINNING_opcode;
cl->nextFreeByteOffset++;
}
void clInsertIncrementSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INCREMENT_SEMAPHORE_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_INCREMENT_SEMAPHORE_opcode;
cl->nextFreeByteOffset++;
}
void clInsertWaitOnSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_WAIT_ON_SEMAPHORE_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_WAIT_ON_SEMAPHORE_opcode;
cl->nextFreeByteOffset++;
}
//input: 2 cls (cl, handles cl)
void clInsertBranch(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++;
assert(cls->CPA);
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = V3D21_BRANCH_opcode; cls->nextFreeByteOffset++;
//TODO is this correct?
//clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = address.offset; cls->nextFreeByteOffset += 4;
}
//input: 2 cls (cl, handles cl)
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++;
assert(cls->CPA);
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByteOffset++;
//TODO is this correct?
//clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = address.offset; cls->nextFreeByteOffset += 4;
}
void clInsertReturnFromSubList(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RETURN_FROM_SUB_LIST_opcode;
cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_RETURN_FROM_SUB_LIST_opcode;
cl->nextFreeByteOffset++;
}
/*void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl)
@ -368,13 +367,12 @@ void clInsertIndexedPrimitiveList(ControlList* cl,
enum V3D21_Primitive primitiveMode)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = indicesAddress; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = maxIndex; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = length; cl->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = indicesAddress; cl->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = maxIndex; cl->nextFreeByteOffset += 4;
}
void clInsertVertexArrayPrimitives(ControlList* cl,
@ -383,11 +381,11 @@ void clInsertVertexArrayPrimitives(ControlList* cl,
enum V3D21_Primitive primitiveMode)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(primitiveMode, 8, 0); cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = firstVertexIndex; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(primitiveMode, 8, 0); cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = length; cl->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = firstVertexIndex; cl->nextFreeByteOffset += 4;
}
/*void clInsertPrimitiveListFormat(ControlList* cl,
@ -407,12 +405,12 @@ void clInsertShaderState(ControlList* cl,
uint32_t numberOfAttributeArrays)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte =
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) =
moveBits(address, 28, 4) |
moveBits(extendedShaderRecord, 1, 3) |
moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByte += 4;
moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByteOffset += 4;
}
/*
@ -447,10 +445,9 @@ void clInsertConfigurationBits(ControlList* cl,
uint32_t enableForwardFacingPrimitive) //0/1
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte =
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) =
moveBits(enableForwardFacingPrimitive, 1, 0) |
moveBits(enableReverseFacingPrimitive, 1, 1) |
moveBits(clockwisePrimitives, 1, 2) |
@ -463,47 +460,43 @@ void clInsertConfigurationBits(ControlList* cl,
moveBits(depthTestFunction, 3, 12) |
moveBits(zUpdatesEnable, 1, 15) |
moveBits(earlyZEnable, 1, 16) |
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 3;
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByteOffset += 3;
}
void clInsertFlatShadeFlags(ControlList* cl,
uint32_t flags)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = flags; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = flags; cl->nextFreeByteOffset += 4;
}
void clInsertPointSize(ControlList* cl,
float size)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_POINT_SIZE_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = size; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_POINT_SIZE_opcode; cl->nextFreeByteOffset++;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = size; cl->nextFreeByteOffset += 4;
}
void clInsertLineWidth(ControlList* cl,
float width)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_LINE_WIDTH_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_LINE_WIDTH_opcode; cl->nextFreeByteOffset++;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = width; cl->nextFreeByteOffset += 4;
}
void clInsertRHTXBoundary(ControlList* cl,
uint32_t boundary) //sint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByte++;
*(uint16_t*)cl->nextFreeByte = moveBits(boundary, 16, 0); cl->nextFreeByte += 2;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByteOffset++;
*(uint16_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(boundary, 16, 0); cl->nextFreeByteOffset += 2;
}
uint32_t f32_to_f187(float f32)
@ -517,9 +510,9 @@ void clInsertDepthOffset(ControlList* cl,
float factor)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(f32_to_f187(factor), 16, 0) | moveBits(f32_to_f187(units), 16, 16); cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(f32_to_f187(factor), 16, 0) | moveBits(f32_to_f187(units), 16, 16); cl->nextFreeByteOffset += 4;
}
void clInsertClipWindow(ControlList* cl,
@ -529,11 +522,10 @@ void clInsertClipWindow(ControlList* cl,
uint32_t leftPixelCoord) //uint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByteOffset += 4;
}
uint16_t get16bitSignedFixedNumber(float x)
@ -549,12 +541,11 @@ void clInsertViewPortOffset(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByteOffset++;
//expects 16 bit signed fixed point number with 4 fractional bits
*(uint16_t*)cl->nextFreeByte = get16bitSignedFixedNumber(x); cl->nextFreeByte += 2;
*(uint16_t*)cl->nextFreeByte = get16bitSignedFixedNumber(y); cl->nextFreeByte += 2;
*(uint16_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = get16bitSignedFixedNumber(x); cl->nextFreeByteOffset += 2;
*(uint16_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = get16bitSignedFixedNumber(y); cl->nextFreeByteOffset += 2;
}
void clInsertZMinMaxClippingPlanes(ControlList* cl,
@ -563,10 +554,10 @@ void clInsertZMinMaxClippingPlanes(ControlList* cl,
)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = minZw; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = maxZw; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByteOffset++;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = minZw; cl->nextFreeByteOffset += 4;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = maxZw; cl->nextFreeByteOffset += 4;
}
void clInsertClipperXYScaling(ControlList* cl,
@ -575,11 +566,10 @@ void clInsertClipperXYScaling(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = height; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByteOffset++;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = width; cl->nextFreeByteOffset += 4;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = height; cl->nextFreeByteOffset += 4;
}
void clInsertClipperZScaleOffset(ControlList* cl,
@ -588,11 +578,10 @@ void clInsertClipperZScaleOffset(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = zScale; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = zOffset; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByteOffset++;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = zScale; cl->nextFreeByteOffset += 4;
*(float*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = zOffset; cl->nextFreeByteOffset += 4;
}
void clInsertTileBinningModeConfiguration(ControlList* cl,
@ -610,12 +599,11 @@ void clInsertTileBinningModeConfiguration(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = tileAllocationMemorySize; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = tileStateDataArrayAddress; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = tileAllocationMemoryAddress; cl->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = tileAllocationMemorySize; cl->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = tileStateDataArrayAddress; cl->nextFreeByteOffset += 4;
uint32_t tileSizeW = 64;
uint32_t tileSizeH = 64;
@ -632,15 +620,15 @@ void clInsertTileBinningModeConfiguration(ControlList* cl,
uint32_t widthInTiles = divRoundUp(widthInPixels, tileSizeW);
uint32_t heightInTiles = divRoundUp(heightInPixels, tileSizeH);
*(uint8_t*)cl->nextFreeByte = widthInTiles; cl->nextFreeByte++;
*(uint8_t*)cl->nextFreeByte = heightInTiles; cl->nextFreeByte++;
*cl->nextFreeByte =
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = widthInTiles; cl->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = heightInTiles; cl->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) =
moveBits(multisampleMode4x, 1, 0) |
moveBits(tileBuffer64BitColorDepth, 1, 1) |
moveBits(autoInitializeTileStateDataArray, 1, 2) |
moveBits(tileAllocationInitialBlockSize, 2, 3) |
moveBits(tileAllocationBlockSize, 2, 5) |
moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByte++;
moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByteOffset++;
}
/*
@ -698,11 +686,10 @@ void clInsertGEMRelocations(ControlList* cl,
uint32_t buffer1)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = buffer0; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = buffer1; cl->nextFreeByte += 4;
assert(cl->CPA);
*(uint8_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByteOffset++;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = buffer0; cl->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cl->CPA, cl->nextFreeByteOffset) = buffer1; cl->nextFreeByteOffset += 4;
}
//input: 2 cls (cl, handles cl)
@ -729,34 +716,33 @@ void clInsertShaderRecord(ControlList* cls,
ControlListAddress coordinateCodeAddress)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte =
assert(cls->CPA);
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) =
moveBits(fragmentShaderIsSingleThreaded, 1, 0) |
moveBits(pointSizeIncludedInShadedVertexData, 1, 1) |
moveBits(enableClipping, 1, 2); cls->nextFreeByte++;
*cls->nextFreeByte = 0; cls->nextFreeByte++;
*(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUsedUniforms, 16, 0); cls->nextFreeByte++;
*cls->nextFreeByte |= fragmentNumberOfVaryings; cls->nextFreeByte++;
clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &fragmentCodeAddress);
*(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4;
moveBits(enableClipping, 1, 2); cls->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = 0; cls->nextFreeByteOffset++;
*(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(fragmentNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) |= fragmentNumberOfVaryings; cls->nextFreeByteOffset++;
clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &fragmentCodeAddress);
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = fragmentCodeAddress.offset; cls->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = fragmentUniformsAddress; cls->nextFreeByteOffset += 4;
*(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUsedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++;
*cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++;
clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &vertexCodeAddress);
*(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(vertexNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset += 2;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexAttributeArraySelectBits; cls->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexTotalAttributesSize; cls->nextFreeByteOffset++;
clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &vertexCodeAddress);
//wtf??? --> shader code will always have an offset of 0 so this is fine
uint32_t offset = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0);
*(uint32_t*)cls->nextFreeByte = offset; cls->nextFreeByte += 4;
cls->nextFreeByte += 4;
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = offset; cls->nextFreeByteOffset += 4;
cls->nextFreeByteOffset += 4;
*(uint16_t*)cls->nextFreeByte = moveBits(coordinateNumberOfUsedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = coordinateAttributeArraySelectBits; cls->nextFreeByte++;
*cls->nextFreeByte = coordinateTotalAttributesSize; cls->nextFreeByte++;
clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &coordinateCodeAddress);
*(uint32_t*)cls->nextFreeByte = coordinateCodeAddress.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = coordinateUniformsAddress; cls->nextFreeByte += 4;
*(uint16_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = moveBits(coordinateNumberOfUsedUniforms, 16, 0); cls->nextFreeByteOffset += 2;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateAttributeArraySelectBits; cls->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateTotalAttributesSize; cls->nextFreeByteOffset++;
clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &coordinateCodeAddress);
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateCodeAddress.offset; cls->nextFreeByteOffset += 4;
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateUniformsAddress; cls->nextFreeByteOffset += 4;
}
//input: 2 cls (cl, handles cl)
@ -771,27 +757,26 @@ void clInsertAttributeRecord(ControlList* cls,
uint32_t coordinateVPMOffset)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
assert(cls->CPA);
uint32_t sizeBytesMinusOne = sizeBytes - 1;
clEmitShaderRelocation(relocCl, handlesCl, handlesBuf, handlesSize, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++;
*cls->nextFreeByte = stride; cls->nextFreeByte++;
*cls->nextFreeByte = vertexVPMOffset; cls->nextFreeByte++;
*cls->nextFreeByte = coordinateVPMOffset; cls->nextFreeByte++;
clEmitShaderRelocation(relocCl, handlesCl, handlesSize, &address);
*(uint32_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = address.offset; cls->nextFreeByteOffset += 4;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = sizeBytesMinusOne; cls->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = stride; cls->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = vertexVPMOffset; cls->nextFreeByteOffset++;
*(uint8_t*)getCPAptrFromOffset(cls->CPA, cls->nextFreeByteOffset) = coordinateVPMOffset; cls->nextFreeByteOffset++;
}
uint32_t clGetHandleIndex(ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, uint32_t handle)
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesSize, uint32_t handle)
{
uint32_t c = 0;
//if curr marker is closed already we need to work with the stored size
uint32_t numHandles = (handlesSize ? handlesSize : (handlesCl->nextFreeByte - handlesBuf)) / 4;
uint32_t numHandles = (handlesSize ? handlesSize : (handlesCl->nextFreeByteOffset - handlesCl->offset)) / 4;
for(; c < numHandles; ++c)
{
if(((uint32_t*)handlesBuf)[c] == handle)
if(((uint32_t*)getCPAptrFromOffset(handlesCl->CPA, handlesCl->offset))[c] == handle)
{
//found
return c;
@ -799,27 +784,25 @@ uint32_t clGetHandleIndex(ControlList* handlesCl, uint8_t* handlesBuf, uint32_t
}
//write handle to handles cl
*(uint32_t*)handlesCl->nextFreeByte = handle;
handlesCl->nextFreeByte += 4;
*(uint32_t*)getCPAptrFromOffset(handlesCl->CPA, handlesCl->nextFreeByteOffset) = handle;
handlesCl->nextFreeByteOffset += 4;
return c;
}
//input: 2 cls (cl + handles cl)
inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, const ControlListAddress* address)
inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesSize, const ControlListAddress* address)
{
assert(relocCl);
assert(relocCl->buffer);
assert(relocCl->nextFreeByte);
assert(relocCl->CPA);
assert(handlesCl);
assert(handlesCl->buffer);
assert(handlesCl->nextFreeByte);
assert(handlesCl->CPA);
assert(address);
assert(address->handle);
//store offset within handles in cl
*(uint32_t*)relocCl->nextFreeByte = clGetHandleIndex(handlesCl, handlesBuf, handlesSize, address->handle);
relocCl->nextFreeByte += 4;
*(uint32_t*)getCPAptrFromOffset(relocCl->CPA, relocCl->nextFreeByteOffset) = clGetHandleIndex(handlesCl, handlesSize, address->handle);
relocCl->nextFreeByteOffset += 4;
}
inline void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address)

View File

@ -15,7 +15,8 @@ typedef struct ControlListAddress
typedef struct CLMarker
{
//current binning cl buf position is this struct in the CL plus sizeof(this struct)
struct CLMarker* nextMarker; //TODO change to offset, could be reallocated
//struct CLMarker* nextMarker; //
uint32_t nextMarkerOffset;
uint32_t size; //in bytes
void* writeImage; //_image* to render to
void* readImage;
@ -41,25 +42,32 @@ typedef struct CLMarker
//pointers that point to where all the other CL data is
//plus sizes
uint8_t* handlesBuf; //TODO change to offset, could be reallocated
//uint8_t* handlesBuf; //
uint32_t handlesBufOffset;
uint32_t handlesSize;
uint8_t* shaderRecBuf; //TODO change to offset, could be reallocated
//uint8_t* shaderRecBuf; //
uint32_t shaderRecBufOffset;
uint32_t shaderRecSize;
uint32_t shaderRecCount;
uint8_t* uniformsBuf; //TODO change to offset, could be reallocated
//uint8_t* uniformsBuf; //
uint32_t uniformsBufOffset;
uint32_t uniformsSize;
} CLMarker;
typedef struct ControlList
{
uint8_t* buffer;
void* CPA;
//uint8_t* buffer;
uint32_t offset; //offset into CPA buf
uint32_t numBlocks;
uint32_t blockSize;
uint8_t* nextFreeByte; //pointer to the next available free byte
CLMarker* currMarker; //TODO change to offset, could be reallocated
//uint8_t* nextFreeByte; //pointer to the next available free byte
uint32_t nextFreeByteOffset; //pointer to the next available free byte
//CLMarker* currMarker;
uint32_t currMarkerOffset;
} ControlList;
void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, const ControlListAddress* address);
void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, uint32_t handlesSize, const ControlListAddress* address);
void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address);
#define __gen_user_data struct ControlList
@ -74,7 +82,7 @@ void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address);
uint32_t divRoundUp(uint32_t n, uint32_t d);
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset);
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size);
void clInit(ControlList* cl, void* buffer, uint32_t blockSize);
void clInit(ControlList* cl, void* CPA, uint32_t offset, uint32_t blockSize);
void clInsertNewCLMarker(ControlList* cl,
ControlList* handlesCL,
ControlList* shaderRecCL,
@ -206,7 +214,7 @@ void clInsertAttributeRecord(ControlList* cls,
uint32_t stride,
uint32_t vertexVPMOffset,
uint32_t coordinateVPMOffset);
uint32_t clGetHandleIndex(ControlList* handlesCl, uint8_t* handlesBuf, uint32_t handlesSize, uint32_t handle);
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handlesSize, uint32_t handle);
#if defined (__cplusplus)
}

View File

@ -121,10 +121,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers(
pCommandBuffers[c]->usageFlags = 0;
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
pCommandBuffers[c]->cp = cp;
clInit(&pCommandBuffers[c]->binCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
clInit(&pCommandBuffers[c]->handlesCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
clInit(&pCommandBuffers[c]->binCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
clInit(&pCommandBuffers[c]->handlesCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
clInit(&pCommandBuffers[c]->shaderRecCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
clInit(&pCommandBuffers[c]->uniformsCl, &cp->cpa, consecutivePoolAllocate(&cp->cpa, 1), cp->cpa.blockSize);
pCommandBuffers[c]->graphicsPipeline = 0;
pCommandBuffers[c]->computePipeline = 0;
@ -151,25 +151,25 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers(
pCommandBuffers[c]->perfmonID = 0;
if(!pCommandBuffers[c]->binCl.buffer)
if(pCommandBuffers[c]->binCl.offset == -1)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->handlesCl.buffer)
if(pCommandBuffers[c]->handlesCl.offset == -1)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->shaderRecCl.buffer)
if(pCommandBuffers[c]->shaderRecCl.offset == -1)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->uniformsCl.buffer)
if(pCommandBuffers[c]->uniformsCl.offset == -1)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
@ -183,10 +183,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers(
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->binCl.buffer, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->handlesCl.buffer, pCommandBuffers[c]->handlesCl.numBlocks);
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->shaderRecCl.buffer, pCommandBuffers[c]->shaderRecCl.numBlocks);
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->uniformsCl.buffer, pCommandBuffers[c]->uniformsCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->binCl.offset), pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->handlesCl.offset), pCommandBuffers[c]->handlesCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->shaderRecCl.offset), pCommandBuffers[c]->shaderRecCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->uniformsCl.offset), pCommandBuffers[c]->uniformsCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
pCommandBuffers[c] = 0;
}
@ -290,14 +290,14 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
{
VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c];
if(!cmdbuf->binCl.currMarker)
if(cmdbuf->binCl.currMarkerOffset == -1)
{
//no markers recorded yet, skip
continue;
}
//first entry is assumed to be a marker
CLMarker* marker = cmdbuf->binCl.buffer;
CLMarker* marker = getCPAptrFromOffset(cmdbuf->binCl.CPA, cmdbuf->binCl.offset);
//a command buffer may contain multiple render passes
//and commands outside render passes such as clear commands
@ -327,12 +327,12 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
uint32_t readMSAAdepthStencilImage = marker->readMSAAdepthStencilImage;
//This should not result in an insertion!
uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeImage->boundMem->bo) : 0;
uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, readImage->boundMem->bo) : 0;
uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0;
uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0;
uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeMSAAimage->boundMem->bo) : 0;
uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0;
uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeImage->boundMem->bo) : 0;
uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, readImage->boundMem->bo) : 0;
uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0;
uint32_t readDepthStencilImageIdx = readDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, readDepthStencilImage->boundMem->bo) : 0;
uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeMSAAimage->boundMem->bo) : 0;
uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0;
// fprintf(stderr, "writeImage: %u\n", writeImage);
// fprintf(stderr, "readImage: %u\n", readImage);
@ -510,10 +510,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
submitCl.height = height;
submitCl.flags |= marker->flags;
submitCl.bo_handles = marker->handlesBuf;
submitCl.bo_handles = getCPAptrFromOffset(cmdbuf->handlesCl.CPA, marker->handlesBufOffset);
submitCl.bin_cl = ((uint8_t*)marker) + sizeof(CLMarker);
submitCl.shader_rec = marker->shaderRecBuf;
submitCl.uniforms = marker->uniformsBuf;
submitCl.shader_rec = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset);
submitCl.uniforms = getCPAptrFromOffset(cmdbuf->uniformsCl.CPA, marker->uniformsBufOffset);
if(marker->perfmonID)
{
@ -653,7 +653,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
}
//advance in linked list
marker = marker->nextMarker;
marker = marker->nextMarkerOffset == -1 ? 0 : getCPAptrFromOffset(cmdbuf->binCl.CPA, marker->nextMarkerOffset);
}
}
@ -706,10 +706,10 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkFreeCommandBuffers(
{
if(pCommandBuffers[c])
{
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->binCl.buffer, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->handlesCl.buffer, pCommandBuffers[c]->handlesCl.numBlocks);
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->shaderRecCl.buffer, pCommandBuffers[c]->shaderRecCl.numBlocks);
consecutivePoolFree(&cp->cpa, pCommandBuffers[c]->uniformsCl.buffer, pCommandBuffers[c]->uniformsCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->binCl.offset), pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->handlesCl.offset), pCommandBuffers[c]->handlesCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->shaderRecCl.offset), pCommandBuffers[c]->shaderRecCl.numBlocks);
consecutivePoolFree(&cp->cpa, getCPAptrFromOffset(&cp->cpa, pCommandBuffers[c]->uniformsCl.offset), pCommandBuffers[c]->uniformsCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
}
}
@ -830,10 +830,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkResetCommandBuffer(
//reset commandbuffer state
commandBuffer->shaderRecCount = 0;
clInit(&commandBuffer->binCl, commandBuffer->binCl.buffer, commandBuffer->cp->cpa.blockSize);
clInit(&commandBuffer->handlesCl, commandBuffer->handlesCl.buffer, commandBuffer->cp->cpa.blockSize);
clInit(&commandBuffer->shaderRecCl, commandBuffer->shaderRecCl.buffer, commandBuffer->cp->cpa.blockSize);
clInit(&commandBuffer->uniformsCl, commandBuffer->uniformsCl.buffer, commandBuffer->cp->cpa.blockSize);
clInit(&commandBuffer->binCl, &commandBuffer->cp->cpa, commandBuffer->binCl.offset, commandBuffer->cp->cpa.blockSize);
clInit(&commandBuffer->handlesCl, &commandBuffer->cp->cpa, commandBuffer->handlesCl.offset, commandBuffer->cp->cpa.blockSize);
clInit(&commandBuffer->shaderRecCl, &commandBuffer->cp->cpa, commandBuffer->shaderRecCl.offset, commandBuffer->cp->cpa.blockSize);
clInit(&commandBuffer->uniformsCl, &commandBuffer->cp->cpa, commandBuffer->uniformsCl.offset, commandBuffer->cp->cpa.blockSize);
commandBuffer->graphicsPipeline = 0;
commandBuffer->computePipeline = 0;

View File

@ -533,12 +533,12 @@ void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize)
{
if(!clHasEnoughSpace(cl, commandSize))
{
uint32_t currSize = cl->nextFreeByte - cl->buffer;
uint32_t currMarkerOffset = (uint8_t*)cl->currMarker - cl->buffer;
cl->buffer = consecutivePoolReAllocate(&cb->cp->cpa, cl->buffer, cl->numBlocks); assert(cl->buffer);
cl->nextFreeByte = cl->buffer + currSize;
uint32_t currSize = cl->nextFreeByteOffset - cl->offset;
uint32_t currMarkerOffset = cl->currMarkerOffset - cl->offset;
cl->offset = consecutivePoolReAllocate(&cb->cp->cpa, getCPAptrFromOffset(cl->CPA, cl->offset), cl->numBlocks); assert(cl->offset != -1);
cl->nextFreeByteOffset = cl->offset + currSize;
cl->numBlocks++;
cl->currMarker = cl->buffer + currMarkerOffset;
cl->currMarkerOffset = cl->offset + currMarkerOffset;
}
}

View File

@ -162,20 +162,20 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateDescriptorSets(
if(imageDescriptorCount > 0)
{
ds->imageDescriptors = consecutivePoolAllocate(&dp->imageDescriptorCPA, imageDescriptorCount);
ds->imageBindingMap = createMap(consecutivePoolAllocate(&dp->mapElementCPA, imageDescriptorCount), imageDescriptorCount);
ds->imageDescriptors = getCPAptrFromOffset(&dp->imageDescriptorCPA, consecutivePoolAllocate(&dp->imageDescriptorCPA, imageDescriptorCount));
ds->imageBindingMap = createMap(getCPAptrFromOffset(&dp->mapElementCPA, consecutivePoolAllocate(&dp->mapElementCPA, imageDescriptorCount)), imageDescriptorCount);
}
if(bufferDescriptorCount > 0)
{
ds->bufferDescriptors = consecutivePoolAllocate(&dp->bufferDescriptorCPA, bufferDescriptorCount);
ds->bufferBindingMap = createMap(consecutivePoolAllocate(&dp->mapElementCPA, bufferDescriptorCount), bufferDescriptorCount);
ds->bufferDescriptors = getCPAptrFromOffset(&dp->bufferDescriptorCPA, consecutivePoolAllocate(&dp->bufferDescriptorCPA, bufferDescriptorCount));
ds->bufferBindingMap = createMap(getCPAptrFromOffset(&dp->mapElementCPA, consecutivePoolAllocate(&dp->mapElementCPA, bufferDescriptorCount)), bufferDescriptorCount);
}
if(texelBufferDescriptorCount > 0)
{
ds->texelBufferDescriptors = consecutivePoolAllocate(&dp->texelBufferDescriptorCPA, texelBufferDescriptorCount);
ds->texelBufferBindingMap = createMap(consecutivePoolAllocate(&dp->mapElementCPA, texelBufferDescriptorCount), texelBufferDescriptorCount);
ds->texelBufferDescriptors = getCPAptrFromOffset(&dp->texelBufferDescriptorCPA, consecutivePoolAllocate(&dp->texelBufferDescriptorCPA, texelBufferDescriptorCount));
ds->texelBufferBindingMap = createMap(getCPAptrFromOffset(&dp->mapElementCPA, consecutivePoolAllocate(&dp->mapElementCPA, texelBufferDescriptorCount)), texelBufferDescriptorCount);
}
//TODO immutable samplers

View File

@ -8,6 +8,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
assert(commandBuffer);
_commandBuffer* cb = commandBuffer;
CLMarker* currMarker = getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset);
//TODO handle cases when submitting >65k vertices in a VBO
//TODO HW-2116 workaround
@ -224,8 +225,8 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
clInsertShaderRecord(&commandBuffer->shaderRecCl,
&relocCl,
&commandBuffer->handlesCl,
cb->binCl.currMarker->handlesBuf,
cb->binCl.currMarker->handlesSize,
getCPAptrFromOffset(cb->handlesCl.CPA, currMarker->handlesBufOffset),
currMarker->handlesSize,
!fragModule->hasThreadSwitch,
0, //TODO point size included in shaded vertex data?
1, //enable clipping
@ -310,8 +311,8 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
clInsertAttributeRecord(&commandBuffer->shaderRecCl,
&relocCl,
&commandBuffer->handlesCl,
cb->binCl.currMarker->handlesBuf,
cb->binCl.currMarker->handlesSize,
getCPAptrFromOffset(cb->handlesCl.CPA, currMarker->handlesBufOffset),
currMarker->handlesSize,
vertexBuffer, //reloc address
formatByteSize,
stride,
@ -349,7 +350,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
//emit reloc for texture BO
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, di->imageView->image->boundMem->bo);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, di->imageView->image->boundMem->bo);
//emit tex bo reloc index
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
@ -368,7 +369,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
//emit reloc for BO
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, db->buffer->boundMem->bo);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, db->buffer->boundMem->bo);
//emit bo reloc index
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
@ -385,7 +386,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
//emit reloc for BO
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, dtb->bufferView->buffer->boundMem->bo);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, dtb->bufferView->buffer->boundMem->bo);
//emit bo reloc index
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
@ -588,9 +589,10 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdDrawIndexed(
uint32_t maxIndex = drawCommon(commandBuffer, vertexOffset);
_commandBuffer* cb = commandBuffer;
CLMarker* currMarker = getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset);
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, cb->indexBuffer->boundMem->bo);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, cb->indexBuffer->boundMem->bo);
clInsertGEMRelocations(&commandBuffer->binCl, idx, 0);

View File

@ -127,21 +127,22 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker));
clInsertNewCLMarker(&commandBuffer->binCl, &cb->handlesCl, &cb->shaderRecCl, cb->shaderRecCount, &cb->uniformsCl);
commandBuffer->binCl.currMarker->writeImage = writeImage;
commandBuffer->binCl.currMarker->writeImageOffset = writeImageOffset;
commandBuffer->binCl.currMarker->readImage = readImage;
commandBuffer->binCl.currMarker->readImageOffset = readImageOffset;
commandBuffer->binCl.currMarker->writeDepthStencilImage = writeDepthStencilImage;
commandBuffer->binCl.currMarker->writeDepthStencilImageOffset = writeDepthStencilImageOffset;
commandBuffer->binCl.currMarker->readDepthStencilImage = readDepthStencilImage;
commandBuffer->binCl.currMarker->readDepthStencilImageOffset = readDepthStencilImageOffset;
commandBuffer->binCl.currMarker->writeMSAAimage = writeMSAAimage;
commandBuffer->binCl.currMarker->writeMSAAimageOffset = writeMSAAimageOffset;
commandBuffer->binCl.currMarker->writeMSAAdepthStencilImage = writeMSAAdepthStencilImage;
commandBuffer->binCl.currMarker->writeMSAAdepthStencilImageOffset = writeMSAAdepthStencilImageOffset;
commandBuffer->binCl.currMarker->performResolve = performResolve;
commandBuffer->binCl.currMarker->readMSAAimage = readMSAAimage;
commandBuffer->binCl.currMarker->readMSAAdepthStencilImage = readMSAAdepthStencilImage;
CLMarker* currMarker = getCPAptrFromOffset(cb->binCl.CPA, cb->binCl.currMarkerOffset);
currMarker->writeImage = writeImage;
currMarker->writeImageOffset = writeImageOffset;
currMarker->readImage = readImage;
currMarker->readImageOffset = readImageOffset;
currMarker->writeDepthStencilImage = writeDepthStencilImage;
currMarker->writeDepthStencilImageOffset = writeDepthStencilImageOffset;
currMarker->readDepthStencilImage = readDepthStencilImage;
currMarker->readDepthStencilImageOffset = readDepthStencilImageOffset;
currMarker->writeMSAAimage = writeMSAAimage;
currMarker->writeMSAAimageOffset = writeMSAAimageOffset;
currMarker->writeMSAAdepthStencilImage = writeMSAAdepthStencilImage;
currMarker->writeMSAAdepthStencilImageOffset = writeMSAAdepthStencilImageOffset;
currMarker->performResolve = performResolve;
currMarker->readMSAAimage = readMSAAimage;
currMarker->readMSAAdepthStencilImage = readMSAAdepthStencilImage;
if(rp->subpasses[0].colorAttachmentCount > 0)
{
@ -153,14 +154,14 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
if(!rp->subpasses[0].pResolveAttachments)
{
cb->binCl.currMarker->clearColor[0] =
cb->binCl.currMarker->clearColor[1] =
currMarker->clearColor[0] =
currMarker->clearColor[1] =
packVec4IntoABGR8(pRenderPassBegin->pClearValues[rp->subpasses[0].pColorAttachments[0].attachment].color.float32);
}
else
{
cb->binCl.currMarker->clearColor[0] =
cb->binCl.currMarker->clearColor[1] =
currMarker->clearColor[0] =
currMarker->clearColor[1] =
packVec4IntoABGR8(pRenderPassBegin->pClearValues[rp->subpasses[0].pColorAttachments[0].attachment].color.float32);
}
}
@ -173,7 +174,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
{
flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
cb->binCl.currMarker->clearDepth =
currMarker->clearDepth =
(uint32_t)(pRenderPassBegin->pClearValues[rp->subpasses[0].pDepthStencilAttachment->attachment].depthStencil.depth * 0xffffff) & 0xffffff;
}
@ -181,55 +182,55 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
{
flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
cb->binCl.currMarker->clearStencil =
currMarker->clearStencil =
pRenderPassBegin->pClearValues[rp->subpasses[0].pDepthStencilAttachment->attachment].depthStencil.stencil & 0xff;
}
}
cb->binCl.currMarker->flags = flags;
currMarker->flags = flags;
//insert relocs
if(writeImage)
{
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeImage->boundMem->bo);
clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeImage->boundMem->bo);
}
if(readImage)
{
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, readImage->boundMem->bo);
clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, readImage->boundMem->bo);
}
if(writeDepthStencilImage)
{
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeDepthStencilImage->boundMem->bo);
clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeDepthStencilImage->boundMem->bo);
}
if(readDepthStencilImage)
{
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, readDepthStencilImage->boundMem->bo);
clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, readDepthStencilImage->boundMem->bo);
}
if(writeMSAAimage)
{
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAimage->boundMem->bo);
clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeMSAAimage->boundMem->bo);
}
if(writeMSAAdepthStencilImage)
{
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo);
clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo);
}
uint32_t bpp = 0;
cb->binCl.currMarker->width = fb->width;
cb->binCl.currMarker->height = fb->height;
currMarker->width = fb->width;
currMarker->height = fb->height;
if(writeImage)
{
@ -247,11 +248,11 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
}
//pad render size if we are rendering to a mip level
cb->binCl.currMarker->renderToMip = biggestMip > 0;
currMarker->renderToMip = biggestMip > 0;
uint32_t width = cb->binCl.currMarker->width;
uint32_t width = currMarker->width;
if(cb->binCl.currMarker->renderToMip)
if(currMarker->renderToMip)
{
width = getPow2Pad(width);
width = width < 4 ? 4 : width;
@ -265,7 +266,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
0, //auto initialize tile state data array
bpp == 64, //64 bit color mode
writeMSAAimage || writeMSAAdepthStencilImage || performResolve ? 1 : 0, //msaa
width, cb->binCl.currMarker->height,
width, currMarker->height,
0, //tile state data array address
0, //tile allocation memory size
0); //tile allocation memory address
@ -277,7 +278,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
clInsertStartTileBinning(&commandBuffer->binCl);
cb->binCl.currMarker->perfmonID = cb->perfmonID;
currMarker->perfmonID = cb->perfmonID;
cb->currRenderPass = rp;
}

View File

@ -512,11 +512,13 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage(
{ //Simplest case: just submit a job to clear the image
clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker));
clInsertNewCLMarker(&commandBuffer->binCl, &commandBuffer->handlesCl, &commandBuffer->shaderRecCl, commandBuffer->shaderRecCount, &commandBuffer->uniformsCl);
commandBuffer->binCl.currMarker->writeImage = i;
CLMarker* currMarker = getCPAptrFromOffset(commandBuffer->binCl.CPA, commandBuffer->binCl.currMarkerOffset);
currMarker->writeImage = i;
//insert reloc for render target
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, i->boundMem->bo);
clGetHandleIndex(&commandBuffer->handlesCl, currMarker->handlesSize, i->boundMem->bo);
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
@ -548,11 +550,11 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage(
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length);
clInsertFlush(&commandBuffer->binCl);
commandBuffer->binCl.currMarker->clearColor[0] = commandBuffer->binCl.currMarker->clearColor[1] = packVec4IntoABGR8(pColor->float32);
commandBuffer->binCl.currMarker->flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
currMarker->clearColor[0] = currMarker->clearColor[1] = packVec4IntoABGR8(pColor->float32);
currMarker->flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
commandBuffer->binCl.currMarker->width = i->width;
commandBuffer->binCl.currMarker->height = i->height;
currMarker->width = i->width;
currMarker->height = i->height;
}
}

View File

@ -18,37 +18,37 @@ void simpleTest()
ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size);
CPAdebugPrint(&cpa);
void* mem1 = consecutivePoolAllocate(&cpa, 1);
uint32_t mem1 = consecutivePoolAllocate(&cpa, 1);
CPAdebugPrint(&cpa);
void* mem2 = consecutivePoolAllocate(&cpa, 2);
uint32_t mem2 = consecutivePoolAllocate(&cpa, 2);
CPAdebugPrint(&cpa);
void* mem3 = consecutivePoolAllocate(&cpa, 3);
uint32_t mem3 = consecutivePoolAllocate(&cpa, 3);
CPAdebugPrint(&cpa);
void* mem11 = consecutivePoolAllocate(&cpa, 1);
uint32_t mem11 = consecutivePoolAllocate(&cpa, 1);
CPAdebugPrint(&cpa);
void* mem111 = consecutivePoolAllocate(&cpa, 1);
uint32_t mem111 = consecutivePoolAllocate(&cpa, 1);
CPAdebugPrint(&cpa);
void* mem0 = consecutivePoolAllocate(&cpa, 1);
uint32_t mem0 = consecutivePoolAllocate(&cpa, 1);
fprintf(stderr, "\n%p\n", mem0);
consecutivePoolFree(&cpa, mem11, 1);
consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem11), 1);
CPAdebugPrint(&cpa);
consecutivePoolFree(&cpa, mem111, 1);
consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem111), 1);
CPAdebugPrint(&cpa);
consecutivePoolFree(&cpa, mem2, 2);
consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem2), 2);
CPAdebugPrint(&cpa);
consecutivePoolFree(&cpa, mem3, 3);
consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem3), 3);
CPAdebugPrint(&cpa);
consecutivePoolFree(&cpa, mem1, 1);
consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem1), 1);
CPAdebugPrint(&cpa);
}
@ -61,7 +61,7 @@ void allocTest(uint32_t numToAlloc)
ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size);
//CPAdebugPrint(&cpa);
void* mem1 = consecutivePoolAllocate(&cpa, numToAlloc);
uint32_t mem1 = consecutivePoolAllocate(&cpa, numToAlloc);
CPAdebugPrint(&cpa);
fprintf(stderr, "\nmem %p\n", mem1);
@ -76,25 +76,52 @@ void freeOneTest(uint32_t which)
ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size);
//CPAdebugPrint(&cpa);
void* mem[8];
uint32_t mem[8];
for(uint32_t c = 0; c < 8; ++c)
{
mem[c] = consecutivePoolAllocate(&cpa, 1);
}
consecutivePoolFree(&cpa, mem[which], 1);
consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem[which]), 1);
CPAdebugPrint(&cpa);
//fprintf(stderr, "\nmem %p\n", mem);
}
int main() {
//simpleTest();
void reallocTest()
{
uint32_t blocksize = 16;
uint32_t numblocks = 3;
uint32_t size = numblocks * blocksize;
allocTest(1);
allocTest(3);
allocTest(8);
allocTest(9);
ConsecutivePoolAllocator cpa = createConsecutivePoolAllocator((char*)malloc(size), blocksize, size);
CPAdebugPrint(&cpa);
uint32_t mem1 = consecutivePoolAllocate(&cpa, 1);
CPAdebugPrint(&cpa);
uint32_t mem2 = consecutivePoolAllocate(&cpa, 1);
CPAdebugPrint(&cpa);
consecutivePoolFree(&cpa, getCPAptrFromOffset(&cpa, mem1), 1);
CPAdebugPrint(&cpa);
mem2 = consecutivePoolReAllocate(&cpa, getCPAptrFromOffset(&cpa, mem2), 1);
CPAdebugPrint(&cpa);
uint32_t mem0 = consecutivePoolAllocate(&cpa, 1);
fprintf(stderr, "\n%p\n", mem0);
}
int main() {
// simpleTest();
reallocTest();
// allocTest(1);
// allocTest(3);
// allocTest(8);
// allocTest(9);
return 0;
}