1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-01 13:24:20 +01:00

split driver.c into multiple files

This commit is contained in:
Unknown 2018-08-26 14:11:43 +01:00
parent 0c3e0f798b
commit 2dee7f9439
19 changed files with 3069 additions and 2888 deletions

31
driver/AlignedAllocator.c Normal file
View File

@ -0,0 +1,31 @@
#include "AlignedAllocator.h"
void* alignedAlloc( unsigned bytes, unsigned alignment )
{
if( !bytes )
{
return 0;
}
const unsigned maxBytes = 1024 * 1024 * 1024; //1GB is max on RPi
if( bytes > maxBytes )
{
return 0; //bad alloc
}
void* pv = 0;
if( posix_memalign( &pv, alignment, bytes ) )
{
pv = 0; //allocation failed
}
return pv;
}
void alignedFree( void* p )
{
free( p );
}

View File

@ -6,34 +6,8 @@ extern "C" {
#include <stdlib.h>
void* alignedAlloc( unsigned bytes, unsigned alignment )
{
if( !bytes )
{
return 0;
}
const unsigned maxBytes = 1024 * 1024 * 1024; //1GB is max on RPi
if( bytes > maxBytes )
{
return 0; //bad alloc
}
void* pv = 0;
if( posix_memalign( &pv, alignment, bytes ) )
{
pv = 0; //allocation failed
}
return pv;
}
void alignedFree( void* p )
{
free( p );
}
void* alignedAlloc( unsigned bytes, unsigned alignment );
void alignedFree( void* p );
#if defined (__cplusplus)
}

View File

@ -0,0 +1,157 @@
#include "ConsecutivePoolAllocator.h"
#include "CustomAssert.h"
#include <stdint.h>
ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s)
{
assert(b); //only allocated memory
assert(bs >= sizeof(void*)); //we need to be able to store
assert(s%bs==0); //we want a size that is the exact multiple of block size
assert(s > bs); //at least 1 element
ConsecutivePoolAllocator pa =
{
.buf = b,
.nextFreeBlock = (uint32_t*)b,
.blockSize = bs,
.size = s
};
//initialize linked list of free pointers
uint32_t* ptr = pa.nextFreeBlock;
unsigned last = s/bs - 1;
for(unsigned c = 0; c < last; ++c)
{
*ptr = (uint32_t)ptr + bs;
ptr += bs/4;
}
*ptr = 0; //last element
return pa;
}
void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa)
{
//actual memory freeing is done by caller
pa->buf = 0;
pa->nextFreeBlock = 0;
pa->blockSize = 0;
pa->size = 0;
}
//allocate numBlocks consecutive memory
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
{
assert(pa->buf);
if(!pa->nextFreeBlock)
{
return 0; //no free blocks
}
void* ret = 0;
for(uint32_t* candidate = pa->nextFreeBlock; candidate; candidate = (uint32_t*)*candidate)
{
uint32_t found = 1;
uint32_t* prevBlock = candidate;
uint32_t* blockAfterCandidate = (uint32_t*)*candidate;
//check if there are enough consecutive free blocks
for(uint32_t c = 0; c < numBlocks - 1; ++c)
{
if(blockAfterCandidate - prevBlock != pa->blockSize)
{
//signal if not consecutive (ie. diff is greater than blocksize)
found = 0;
break;
}
prevBlock = blockAfterCandidate;
blockAfterCandidate = (uint32_t*)*blockAfterCandidate;
}
//numblocks consecutive blocks found
if(found)
{
ret = candidate;
if(pa->nextFreeBlock == candidate)
{
//candidate found immediately
pa->nextFreeBlock = blockAfterCandidate;
}
else
{
//somewhere the linked list would point to candidate, we need to correct this
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
{
if((uint32_t*)*nextFreeBlockCandidate == candidate)
{
*nextFreeBlockCandidate = (uint32_t)blockAfterCandidate;
break;
}
}
}
break;
}
}
return ret;
}
//free numBlocks consecutive memory
void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks)
{
assert(pa->buf);
assert(p);
if((void*)pa->nextFreeBlock > p)
{
for(uint32_t c = 0; c < numBlocks - 1; ++c)
{
//set each allocated block to form a linked list
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
}
//set last block to point to the next free
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = (uint32_t)pa->nextFreeBlock;
//set next free to the newly freed block
pa->nextFreeBlock = p;
return;
}
//somewhere the linked list may point after the free block (or null), we need to correct this
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
{
if((void*)*nextFreeBlockCandidate > p || !*nextFreeBlockCandidate)
{
for(uint32_t c = 0; c < numBlocks - 1; ++c)
{
//set each allocated block to form a linked list
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
}
//set last block to point to the next free
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = *nextFreeBlockCandidate;
*nextFreeBlockCandidate = (uint32_t)p;
break;
}
}
}
//if there's a block free after the current block, it just allocates one more block
//else it frees current block and allocates a new one
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks)
{
if(pa->nextFreeBlock == (uint32_t*)((char*)currentMem + currNumBlocks * pa->blockSize))
{
//we have one more block after current one, so just expand current
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
return currentMem;
}
else
{
void* ret = consecutivePoolAllocate(pa, currNumBlocks + 1);
consecutivePoolFree(pa, currentMem, currNumBlocks);
return ret;
}
}

View File

@ -16,157 +16,11 @@ typedef struct ConsecutivePoolAllocator
unsigned size; //size is exact multiple of block size
} ConsecutivePoolAllocator;
ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s)
{
assert(b); //only allocated memory
assert(bs >= sizeof(void*)); //we need to be able to store
assert(s%bs==0); //we want a size that is the exact multiple of block size
assert(s > bs); //at least 1 element
ConsecutivePoolAllocator pa =
{
.buf = b,
.nextFreeBlock = (uint32_t*)b,
.blockSize = bs,
.size = s
};
//initialize linked list of free pointers
uint32_t* ptr = pa.nextFreeBlock;
unsigned last = s/bs - 1;
for(unsigned c = 0; c < last; ++c)
{
*ptr = (uint32_t)ptr + bs;
ptr += bs/4;
}
*ptr = 0; //last element
return pa;
}
void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa)
{
//actual memory freeing is done by caller
pa->buf = 0;
pa->nextFreeBlock = 0;
pa->blockSize = 0;
pa->size = 0;
}
//allocate numBlocks consecutive memory
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
{
assert(pa->buf);
if(!pa->nextFreeBlock)
{
return 0; //no free blocks
}
void* ret = 0;
for(uint32_t* candidate = pa->nextFreeBlock; candidate; candidate = (uint32_t*)*candidate)
{
uint32_t found = 1;
uint32_t* prevBlock = candidate;
uint32_t* blockAfterCandidate = (uint32_t*)*candidate;
//check if there are enough consecutive free blocks
for(uint32_t c = 0; c < numBlocks - 1; ++c)
{
if(blockAfterCandidate - prevBlock != pa->blockSize)
{
//signal if not consecutive (ie. diff is greater than blocksize)
found = 0;
break;
}
prevBlock = blockAfterCandidate;
blockAfterCandidate = (uint32_t*)*blockAfterCandidate;
}
//numblocks consecutive blocks found
if(found)
{
ret = candidate;
if(pa->nextFreeBlock == candidate)
{
//candidate found immediately
pa->nextFreeBlock = blockAfterCandidate;
}
else
{
//somewhere the linked list would point to candidate, we need to correct this
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
{
if((uint32_t*)*nextFreeBlockCandidate == candidate)
{
*nextFreeBlockCandidate = (uint32_t)blockAfterCandidate;
break;
}
}
}
break;
}
}
return ret;
}
//free numBlocks consecutive memory
void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks)
{
assert(pa->buf);
assert(p);
if((void*)pa->nextFreeBlock > p)
{
for(uint32_t c = 0; c < numBlocks - 1; ++c)
{
//set each allocated block to form a linked list
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
}
//set last block to point to the next free
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = (uint32_t)pa->nextFreeBlock;
//set next free to the newly freed block
pa->nextFreeBlock = p;
return;
}
//somewhere the linked list may point after the free block (or null), we need to correct this
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
{
if((void*)*nextFreeBlockCandidate > p || !*nextFreeBlockCandidate)
{
for(uint32_t c = 0; c < numBlocks - 1; ++c)
{
//set each allocated block to form a linked list
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
}
//set last block to point to the next free
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = *nextFreeBlockCandidate;
*nextFreeBlockCandidate = (uint32_t)p;
break;
}
}
}
//if there's a block free after the current block, it just allocates one more block
//else it frees current block and allocates a new one
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks)
{
if(pa->nextFreeBlock == (uint32_t*)((char*)currentMem + currNumBlocks * pa->blockSize))
{
//we have one more block after current one, so just expand current
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
return currentMem;
}
else
{
void* ret = consecutivePoolAllocate(pa, currNumBlocks + 1);
consecutivePoolFree(pa, currentMem, currNumBlocks);
return ret;
}
}
ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s);
void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa);
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks);
void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks);
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks);
#if defined (__cplusplus)
}

715
driver/ControlListUtil.c Normal file
View File

@ -0,0 +1,715 @@
#include "ControlListUtil.h"
#include <stdint.h>
uint32_t divRoundUp(uint32_t n, uint32_t d)
{
return (((n) + (d) - 1) / (d));
}
//move bits to offset, mask rest to 0
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset)
{
return (d << offset) & (~(~0 << bits) << offset);
}
uint32_t clSize(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
return cl->nextFreeByte - cl->buffer;
}
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
{
uint32_t currSize = clSize(cl);
if(currSize + size < CONTROL_LIST_SIZE)
{
return 1; //fits!
}
else
{
return 0; //need to reallocate
}
}
void clInit(ControlList* cl, void* buffer)
{
assert(cl);
assert(buffer);
cl->buffer = buffer;
cl->numBlocks = 1;
cl->nextFreeByte = &cl->buffer[0];
}
void clInsertHalt(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_HALT_opcode;
cl->nextFreeByte++;
}
void clInsertNop(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_NOP_opcode;
cl->nextFreeByte++;
}
void clInsertFlush(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_opcode;
cl->nextFreeByte++;
}
void clInsertFlushAllState(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_ALL_STATE_opcode;
cl->nextFreeByte++;
}
void clInsertStartTileBinning(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_START_TILE_BINNING_opcode;
cl->nextFreeByte++;
}
void clInsertIncrementSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INCREMENT_SEMAPHORE_opcode;
cl->nextFreeByte++;
}
void clInsertWaitOnSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_WAIT_ON_SEMAPHORE_opcode;
cl->nextFreeByte++;
}
//input: 2 cls (cl, handles cl)
void clInsertBranch(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
}
//input: 2 cls (cl, handles cl)
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
}
void clInsertReturnFromSubList(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RETURN_FROM_SUB_LIST_opcode;
cl->nextFreeByte++;
}
void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_opcode;
cl->nextFreeByte++;
}
void clInsertStoreMultiSampleResolvedTileColorBufferAndEOF(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_AND_EOF_opcode;
cl->nextFreeByte++;
}
/*
//input: 2 cls (cl, handles cl)
void clInsertStoreFullResolutionTileBuffer(ControlList* cls,
ControlListAddress address,
uint32_t lastTile, //0/1
uint32_t disableClearOnWrite, //0/1
uint32_t disableZStencilBufferWrite, //0/1
uint32_t disableColorBufferWrite) //0/1
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_STORE_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferWrite, 1, 0) |
moveBits(disableZStencilBufferWrite, 1, 1) |
moveBits(disableClearOnWrite, 1, 2) |
moveBits(lastTile, 1, 3) |
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
/*
//input: 2 cls (cl, handles cl)
void clInsertReLoadFullResolutionTileBuffer(ControlList* cls,
ControlListAddress address,
uint32_t disableZStencilBufferRead, //0/1
uint32_t disableColorBufferRead) //0/1
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_RE_LOAD_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferRead, 1, 0) |
moveBits(disableZStencilBufferRead, 1, 1) |
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
/*
//input: 2 cls (cl, handles cl)
void clInsertStoreTileBufferGeneral(ControlList* cls,
ControlListAddress address,
uint32_t lastTileOfFrame, //0/1
uint32_t disableZStencilBufferDump, //0/1
uint32_t disableColorBufferDump, //0/1
uint32_t disableZStencilBufferClearOnStoreDump, //0/1
uint32_t disableColorBufferClearOnStoreDump, //0/1
uint32_t disableDoubleBufferSwap, //0/1
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
uint32_t format, //0/1/2 raster/t/lt
uint32_t bufferToStore) //0/1/2/3/5 none/color/zstencil/z/full
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_STORE_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
//TODO is this correct?
*cls->nextFreeByte =
moveBits(bufferToStore, 3, 0) |
moveBits(format, 2, 4) |
moveBits(mode, 2, 6);
cls->nextFreeByte++;
*cls->nextFreeByte =
moveBits(pixelColorFormat, 2, 0) |
moveBits(disableDoubleBufferSwap, 1, 4) |
moveBits(disableColorBufferClearOnStoreDump, 1, 5) |
moveBits(disableZStencilBufferClearOnStoreDump, 1, 6) |
moveBits(1, 1, 7); //disable vg mask
cls->nextFreeByte++;
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferDump, 1, 0) |
moveBits(disableZStencilBufferDump, 1, 1) |
moveBits(1, 1, 2) | //disable vg mask
moveBits(lastTileOfFrame, 1, 3) |
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
/*
//input: 2 cls (cl, handles cl)
void clInsertLoadTileBufferGeneral(ControlList* cls,
ControlListAddress address,
uint32_t disableZStencilBufferLoad, //0/1
uint32_t disableColorBufferLoad, //0/1
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
uint32_t format, //0/1/2 raster/t/lt
uint32_t bufferToLoad) //0/1/2/3/5 none/color/zstencil/z/full
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_LOAD_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
//TODO is this correct?
*cls->nextFreeByte =
moveBits(bufferToLoad, 3, 0) |
moveBits(format, 2, 4);
cls->nextFreeByte++;
*cls->nextFreeByte =
moveBits(pixelColorFormat, 2, 0);
cls->nextFreeByte++;
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferLoad, 1, 0) |
moveBits(disableZStencilBufferLoad, 1, 1) |
moveBits(1, 1, 2) | //disable vg mask
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
void clInsertIndexedPrimitiveList(ControlList* cl,
uint32_t maxIndex,
uint32_t indicesAddress,
uint32_t length,
uint32_t indexType, //0/1: 8 or 16 bit
enum V3D21_Primitive primitiveMode)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = indicesAddress; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = maxIndex; cl->nextFreeByte += 4;
}
void clInsertVertexArrayPrimitives(ControlList* cl,
uint32_t firstVertexIndex,
uint32_t length,
enum V3D21_Primitive primitiveMode)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(primitiveMode, 8, 0); cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = firstVertexIndex; cl->nextFreeByte += 4;
}
void clInsertPrimitiveListFormat(ControlList* cl,
uint32_t dataType, //1/3: 16 or 32 bit
uint32_t primitiveType) //0/1/2/3: point/line/tri/rhy
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_PRIMITIVE_LIST_FORMAT_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(dataType, 4, 4) | moveBits(primitiveType, 4, 0); cl->nextFreeByte++;
}
void clInsertShaderState(ControlList* cl,
uint32_t address,
uint32_t extendedShaderRecord, //0/1: true/false
uint32_t numberOfAttributeArrays)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByte++;
//TODO is this correct?
*(uint32_t*)cl->nextFreeByte =
moveBits(address, 28, 4) |
moveBits(extendedShaderRecord, 1, 3) |
moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByte += 4;
}
/*
void clInsertClearColors(ControlList* cl,
uint32_t clearStencil,
uint32_t clearZ, //24 bit Z
uint64_t clearColor) //2x RGBA8 or 1x RGBA16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLEAR_COLORS_opcode; cl->nextFreeByte++;
*(uint64_t*)cl->nextFreeByte = clearColor; cl->nextFreeByte += 8;
*(uint32_t*)cl->nextFreeByte = clearZ; cl->nextFreeByte += 4; //24 bits for Z, 8 bit for vg mask (unused)
*cl->nextFreeByte = clearStencil; cl->nextFreeByte++;
}
*/
void clInsertConfigurationBits(ControlList* cl,
uint32_t earlyZUpdatesEnable, //0/1
uint32_t earlyZEnable, //0/1
uint32_t zUpdatesEnable, //0/1
enum V3D21_Compare_Function depthTestFunction,
uint32_t coverageReadMode, //0/1 clear/leave as is
uint32_t coveragePipeSelect, //0/1
uint32_t coverageUpdateMode, //0/1/2/3 nonzero, odd, or, zero
uint32_t coverageReadType, //0/1 4*8bit, 16 bit mask
uint32_t rasterizerOversampleMode, //0/1/2 none, 4x, 16x
uint32_t enableDepthOffset, //0/1
uint32_t clockwisePrimitives, //0/1
uint32_t enableReverseFacingPrimitive, //0/1
uint32_t enableForwardFacingPrimitive) //0/1
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte =
moveBits(enableForwardFacingPrimitive, 1, 0) |
moveBits(enableReverseFacingPrimitive, 1, 1) |
moveBits(clockwisePrimitives, 1, 2) |
moveBits(enableDepthOffset, 1, 3) |
moveBits(coverageReadType, 1, 5) |
moveBits(rasterizerOversampleMode, 2, 6) |
moveBits(coveragePipeSelect, 1, 8) |
moveBits(coverageUpdateMode, 2, 9) |
moveBits(coverageReadMode, 1, 11) |
moveBits(depthTestFunction, 3, 12) |
moveBits(zUpdatesEnable, 1, 15) |
moveBits(earlyZEnable, 1, 16) |
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 4;
}
void clInsertFlatShadeFlags(ControlList* cl,
uint32_t flags)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = flags; cl->nextFreeByte += 4;
}
void clInsertPointSize(ControlList* cl,
float size)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_POINT_SIZE_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = size; cl->nextFreeByte += 4;
}
void clInsertLineWidth(ControlList* cl,
float width)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_LINE_WIDTH_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
}
void clInsertRHTXBoundary(ControlList* cl,
uint32_t boundary) //sint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByte++;
*(uint16_t*)cl->nextFreeByte = moveBits(boundary, 16, 0); cl->nextFreeByte += 2;
}
void clInsertDepthOffset(ControlList* cl,
uint32_t units, //float 187
uint32_t factor) //float 187
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(factor, 16, 0) | moveBits(units, 16, 16); cl->nextFreeByte += 4;
}
void clInsertClipWindow(ControlList* cl,
uint32_t width, //uint16
uint32_t height, //uint16
uint32_t bottomPixelCoord, //uint16
uint32_t leftPixelCoord) //uint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByte += 4;
}
void clInsertViewPortOffset(ControlList* cl,
uint32_t x, //sint16
uint32_t y //sint16
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(x, 16, 0) | moveBits(y, 16, 16); cl->nextFreeByte += 4;
}
void clInsertZMinMaxClippingPlanes(ControlList* cl,
float minZw,
float maxZw
)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = minZw; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = maxZw; cl->nextFreeByte += 4;
}
void clInsertClipperXYScaling(ControlList* cl,
float width, //half height in 1/16 of pixel
float height //half width in 1/16 of pixel
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = height; cl->nextFreeByte += 4;
}
void clInsertClipperZScaleOffset(ControlList* cl,
float zOffset, //zc to zs
float zScale //zc to zs
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = zScale; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = zOffset; cl->nextFreeByte += 4;
}
void clInsertTileBinningModeConfiguration(ControlList* cl,
uint32_t doubleBufferInNonMsMode, //0/1
uint32_t tileAllocationBlockSize, //0/1/2/3 32/64/128/256 bytes
uint32_t tileAllocationInitialBlockSize, //0/1/2/3 32/64/128/256 bytes
uint32_t autoInitializeTileStateDataArray, //0/1
uint32_t tileBuffer64BitColorDepth, //0/1
uint32_t multisampleMode4x, //0/1
uint32_t widthInPixels,
uint32_t heightInPixels,
uint32_t tileStateDataArrayAddress, //16 byte aligned, size of 48 bytes * num tiles
uint32_t tileAllocationMemorySize,
uint32_t tileAllocationMemoryAddress
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = tileAllocationMemorySize; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = tileStateDataArrayAddress; cl->nextFreeByte += 4;
uint32_t tileSizeW = 64;
uint32_t tileSizeH = 64;
if(multisampleMode4x)
{
tileSizeW >>= 1;
tileSizeH >>= 1;
}
if(tileBuffer64BitColorDepth)
{
tileSizeH >>= 1;
}
uint32_t widthInTiles = divRoundUp(widthInPixels, tileSizeW);
uint32_t heightInTiles = divRoundUp(heightInPixels, tileSizeH);
*(uint8_t*)cl->nextFreeByte = widthInTiles; cl->nextFreeByte++;
*(uint8_t*)cl->nextFreeByte = heightInTiles; cl->nextFreeByte++;
*cl->nextFreeByte =
moveBits(multisampleMode4x, 1, 0) |
moveBits(tileBuffer64BitColorDepth, 1, 1) |
moveBits(autoInitializeTileStateDataArray, 1, 2) |
moveBits(tileAllocationInitialBlockSize, 2, 3) |
moveBits(tileAllocationBlockSize, 2, 5) |
moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByte++;
}
/*
void clInsertTileRenderingModeConfiguration(ControlList* cls,
ControlListAddress address,
uint32_t doubleBufferInNonMsMode, //0/1
uint32_t earlyZEarlyCovDisable, //0/1
uint32_t earlyZUpdateDirection, //0/1 lt,le/gt,ge
uint32_t selectCoverageMode, //0/1
uint32_t memoryFormat, //0/1/2 linear/t/lt
uint32_t decimateMode, //0/1/2 0x/4x/16x
uint32_t nonHDRFrameFormatColorFormat, //0/1/2 bgr565dithered/rgba8/bgr565nodither
uint32_t tileBufferHDRMode, //0/1
uint32_t multisampleMode4x, //0/1
uint32_t widthPixels,
uint32_t heightPixels)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_TILE_RENDERING_MODE_CONFIGURATION_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = moveBits(widthPixels, 16, 0) | moveBits(heightPixels, 16, 16); cls->nextFreeByte += 4;
*(uint16_t*)cls->nextFreeByte =
moveBits(multisampleMode4x, 1, 0) |
moveBits(tileBufferHDRMode, 1, 1) |
moveBits(nonHDRFrameFormatColorFormat, 2, 2) |
moveBits(decimateMode, 2, 4) |
moveBits(memoryFormat, 2, 6) |
moveBits(0, 1, 8) | //vg buffer enable
moveBits(selectCoverageMode, 1, 9) |
moveBits(earlyZUpdateDirection, 1, 10) |
moveBits(earlyZEarlyCovDisable, 1, 11) |
moveBits(doubleBufferInNonMsMode, 1, 12); cls->nextFreeByte += 2;
}
*/
/*
void clInsertTileCoordinates(ControlList* cl,
uint32_t tileColumnNumber, //int8
uint32_t tileRowNumber) //int8
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_TILE_COORDINATES_opcode; cl->nextFreeByte++;
*(uint16_t*)cl->nextFreeByte = moveBits(tileColumnNumber, 8, 0) | moveBits(tileRowNumber, 8, 8); cl->nextFreeByte += 2;
}
*/
void clInsertGEMRelocations(ControlList* cl,
uint32_t buffer0,
uint32_t buffer1)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = buffer0; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = buffer1; cl->nextFreeByte += 4;
}
//input: 2 cls (cl, handles cl)
void clInsertShaderRecord(ControlList* cls,
uint32_t fragmentShaderIsSingleThreaded, //0/1
uint32_t pointSizeIncludedInShadedVertexData, //0/1
uint32_t enableClipping, //0/1
uint32_t fragmentNumberOfUnusedUniforms,
uint32_t fragmentNumberOfVaryings,
uint32_t fragmentUniformsAddress,
ControlListAddress fragmentCodeAddress,
uint32_t vertexNumberOfUnusedUniforms,
uint32_t vertexAttributeArraySelectBits,
uint32_t vertexTotalAttributesSize,
uint32_t vertexUniformsAddress,
ControlListAddress vertexCodeAddress)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
//TODO is this correct?
*cls->nextFreeByte =
moveBits(fragmentShaderIsSingleThreaded, 1, 0) |
moveBits(pointSizeIncludedInShadedVertexData, 1, 1) |
moveBits(enableClipping, 1, 2); cls->nextFreeByte++;
*cls->nextFreeByte = 0; cls->nextFreeByte++;
*(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = fragmentNumberOfVaryings; cls->nextFreeByte++;
clEmitShaderRelocation(cls, &fragmentCodeAddress);
*(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4;
*(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++;
*cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++;
clEmitShaderRelocation(cls, &vertexCodeAddress);
*(uint32_t*)cls->nextFreeByte = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); cls->nextFreeByte += 4; //???
cls->nextFreeByte += 4;
//skip coordinate shader stuff
cls->nextFreeByte += 16;
}
//input: 2 cls (cl, handles cl)
void clInsertAttributeRecord(ControlList* cls,
ControlListAddress address,
uint32_t sizeBytes,
uint32_t stride,
uint32_t vertexVPMOffset)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
uint32_t sizeBytesMinusOne = sizeBytes - 1;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++;
*cls->nextFreeByte = stride; cls->nextFreeByte++;
*cls->nextFreeByte = vertexVPMOffset; cls->nextFreeByte++;
cls->nextFreeByte++; //skip coordinate shader stuff
}
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle)
{
uint32_t c = 0;
uint32_t numHandles = clSize(handlesCl) / 4;
for(; c < numHandles; ++c)
{
if(((uint32_t*)handlesCl->buffer)[c] == handle)
{
//found
return c;
}
}
//write handle to handles cl
*(uint32_t*)handlesCl->nextFreeByte = handle;
handlesCl->nextFreeByte += 4;
return c;
}
//input: 2 cls (cl + handles cl)
inline void clEmitShaderRelocation(ControlList* cls, const ControlListAddress* address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
assert(address);
assert(address->handle);
//search for handle in handles cl
//if found insert handle index
ControlList* cl = cls;
ControlList* handlesCl = cls + 1;
//store offset within handles in cl
*(uint32_t*)cl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle);
cl->nextFreeByte += 4;
}

View File

@ -21,7 +21,7 @@ typedef struct ControlList
uint8_t* nextFreeByte; //pointer to the next available free byte
} ControlList;
static inline void clEmitShaderRelocation(ControlList* cl, const ControlListAddress* address);
void clEmitShaderRelocation(ControlList* cl, const ControlListAddress* address);
#define __gen_user_data struct ControlList
#define __gen_address_type ControlListAddress
@ -30,356 +30,40 @@ static inline void clEmitShaderRelocation(ControlList* cl, const ControlListAddr
#include "brcm/cle/v3d_packet_v21_pack.h"
uint32_t divRoundUp(uint32_t n, uint32_t d)
{
return (((n) + (d) - 1) / (d));
}
//move bits to offset, mask rest to 0
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset)
{
return (d << offset) & (~(~0 << bits) << offset);
}
uint32_t clSize(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
return cl->nextFreeByte - cl->buffer;
}
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
{
uint32_t currSize = clSize(cl);
if(currSize + size < CONTROL_LIST_SIZE)
{
return 1; //fits!
}
else
{
return 0; //need to reallocate
}
}
void clInit(ControlList* cl, void* buffer)
{
assert(cl);
assert(buffer);
cl->buffer = buffer;
cl->numBlocks = 1;
cl->nextFreeByte = &cl->buffer[0];
}
void clInsertHalt(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_HALT_opcode;
cl->nextFreeByte++;
}
void clInsertNop(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_NOP_opcode;
cl->nextFreeByte++;
}
void clInsertFlush(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_opcode;
cl->nextFreeByte++;
}
void clInsertFlushAllState(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_ALL_STATE_opcode;
cl->nextFreeByte++;
}
void clInsertStartTileBinning(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_START_TILE_BINNING_opcode;
cl->nextFreeByte++;
}
void clInsertIncrementSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INCREMENT_SEMAPHORE_opcode;
cl->nextFreeByte++;
}
void clInsertWaitOnSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_WAIT_ON_SEMAPHORE_opcode;
cl->nextFreeByte++;
}
//input: 2 cls (cl, handles cl)
void clInsertBranch(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
}
//input: 2 cls (cl, handles cl)
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
}
void clInsertReturnFromSubList(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RETURN_FROM_SUB_LIST_opcode;
cl->nextFreeByte++;
}
void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_opcode;
cl->nextFreeByte++;
}
void clInsertStoreMultiSampleResolvedTileColorBufferAndEOF(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_AND_EOF_opcode;
cl->nextFreeByte++;
}
/*
//input: 2 cls (cl, handles cl)
void clInsertStoreFullResolutionTileBuffer(ControlList* cls,
ControlListAddress address,
uint32_t lastTile, //0/1
uint32_t disableClearOnWrite, //0/1
uint32_t disableZStencilBufferWrite, //0/1
uint32_t disableColorBufferWrite) //0/1
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_STORE_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferWrite, 1, 0) |
moveBits(disableZStencilBufferWrite, 1, 1) |
moveBits(disableClearOnWrite, 1, 2) |
moveBits(lastTile, 1, 3) |
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
/*
//input: 2 cls (cl, handles cl)
void clInsertReLoadFullResolutionTileBuffer(ControlList* cls,
ControlListAddress address,
uint32_t disableZStencilBufferRead, //0/1
uint32_t disableColorBufferRead) //0/1
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_RE_LOAD_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferRead, 1, 0) |
moveBits(disableZStencilBufferRead, 1, 1) |
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
/*
//input: 2 cls (cl, handles cl)
void clInsertStoreTileBufferGeneral(ControlList* cls,
ControlListAddress address,
uint32_t lastTileOfFrame, //0/1
uint32_t disableZStencilBufferDump, //0/1
uint32_t disableColorBufferDump, //0/1
uint32_t disableZStencilBufferClearOnStoreDump, //0/1
uint32_t disableColorBufferClearOnStoreDump, //0/1
uint32_t disableDoubleBufferSwap, //0/1
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
uint32_t format, //0/1/2 raster/t/lt
uint32_t bufferToStore) //0/1/2/3/5 none/color/zstencil/z/full
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_STORE_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
//TODO is this correct?
*cls->nextFreeByte =
moveBits(bufferToStore, 3, 0) |
moveBits(format, 2, 4) |
moveBits(mode, 2, 6);
cls->nextFreeByte++;
*cls->nextFreeByte =
moveBits(pixelColorFormat, 2, 0) |
moveBits(disableDoubleBufferSwap, 1, 4) |
moveBits(disableColorBufferClearOnStoreDump, 1, 5) |
moveBits(disableZStencilBufferClearOnStoreDump, 1, 6) |
moveBits(1, 1, 7); //disable vg mask
cls->nextFreeByte++;
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferDump, 1, 0) |
moveBits(disableZStencilBufferDump, 1, 1) |
moveBits(1, 1, 2) | //disable vg mask
moveBits(lastTileOfFrame, 1, 3) |
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
/*
//input: 2 cls (cl, handles cl)
void clInsertLoadTileBufferGeneral(ControlList* cls,
ControlListAddress address,
uint32_t disableZStencilBufferLoad, //0/1
uint32_t disableColorBufferLoad, //0/1
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
uint32_t format, //0/1/2 raster/t/lt
uint32_t bufferToLoad) //0/1/2/3/5 none/color/zstencil/z/full
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_LOAD_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
//TODO is this correct?
*cls->nextFreeByte =
moveBits(bufferToLoad, 3, 0) |
moveBits(format, 2, 4);
cls->nextFreeByte++;
*cls->nextFreeByte =
moveBits(pixelColorFormat, 2, 0);
cls->nextFreeByte++;
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte =
moveBits(disableColorBufferLoad, 1, 0) |
moveBits(disableZStencilBufferLoad, 1, 1) |
moveBits(1, 1, 2) | //disable vg mask
moveBits(address.offset, 28, 4);
cls->nextFreeByte += 4;
}
*/
uint32_t divRoundUp(uint32_t n, uint32_t d);
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset);
uint32_t clSize(ControlList* cl);
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size);
void clInit(ControlList* cl, void* buffer);
void clInsertHalt(ControlList* cl);
void clInsertNop(ControlList* cl);
void clInsertFlush(ControlList* cl);
void clInsertFlushAllState(ControlList* cl);
void clInsertStartTileBinning(ControlList* cl);
void clInsertIncrementSemaphore(ControlList* cl);
void clInsertWaitOnSemaphore(ControlList* cl);
void clInsertBranch(ControlList* cls, ControlListAddress address);
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address);
void clInsertReturnFromSubList(ControlList* cl);
void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl);
void clInsertStoreMultiSampleResolvedTileColorBufferAndEOF(ControlList* cl);
void clInsertIndexedPrimitiveList(ControlList* cl,
uint32_t maxIndex,
uint32_t indicesAddress,
uint32_t length,
uint32_t indexType, //0/1: 8 or 16 bit
enum V3D21_Primitive primitiveMode)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = indicesAddress; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = maxIndex; cl->nextFreeByte += 4;
}
enum V3D21_Primitive primitiveMode);
void clInsertVertexArrayPrimitives(ControlList* cl,
uint32_t firstVertexIndex,
uint32_t length,
enum V3D21_Primitive primitiveMode)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(primitiveMode, 8, 0); cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = firstVertexIndex; cl->nextFreeByte += 4;
}
enum V3D21_Primitive primitiveMode);
void clInsertPrimitiveListFormat(ControlList* cl,
uint32_t dataType, //1/3: 16 or 32 bit
uint32_t primitiveType) //0/1/2/3: point/line/tri/rhy
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_PRIMITIVE_LIST_FORMAT_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(dataType, 4, 4) | moveBits(primitiveType, 4, 0); cl->nextFreeByte++;
}
uint32_t primitiveType); //0/1/2/3: point/line/tri/rhy
void clInsertShaderState(ControlList* cl,
uint32_t address,
uint32_t extendedShaderRecord, //0/1: true/false
uint32_t numberOfAttributeArrays)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByte++;
//TODO is this correct?
*(uint32_t*)cl->nextFreeByte =
moveBits(address, 28, 4) |
moveBits(extendedShaderRecord, 1, 3) |
moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByte += 4;
}
/*
void clInsertClearColors(ControlList* cl,
uint32_t clearStencil,
uint32_t clearZ, //24 bit Z
uint64_t clearColor) //2x RGBA8 or 1x RGBA16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLEAR_COLORS_opcode; cl->nextFreeByte++;
*(uint64_t*)cl->nextFreeByte = clearColor; cl->nextFreeByte += 8;
*(uint32_t*)cl->nextFreeByte = clearZ; cl->nextFreeByte += 4; //24 bits for Z, 8 bit for vg mask (unused)
*cl->nextFreeByte = clearStencil; cl->nextFreeByte++;
}
*/
uint32_t numberOfAttributeArrays);
void clInsertConfigurationBits(ControlList* cl,
uint32_t earlyZUpdatesEnable, //0/1
uint32_t earlyZEnable, //0/1
@ -393,142 +77,39 @@ void clInsertConfigurationBits(ControlList* cl,
uint32_t enableDepthOffset, //0/1
uint32_t clockwisePrimitives, //0/1
uint32_t enableReverseFacingPrimitive, //0/1
uint32_t enableForwardFacingPrimitive) //0/1
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte =
moveBits(enableForwardFacingPrimitive, 1, 0) |
moveBits(enableReverseFacingPrimitive, 1, 1) |
moveBits(clockwisePrimitives, 1, 2) |
moveBits(enableDepthOffset, 1, 3) |
moveBits(coverageReadType, 1, 5) |
moveBits(rasterizerOversampleMode, 2, 6) |
moveBits(coveragePipeSelect, 1, 8) |
moveBits(coverageUpdateMode, 2, 9) |
moveBits(coverageReadMode, 1, 11) |
moveBits(depthTestFunction, 3, 12) |
moveBits(zUpdatesEnable, 1, 15) |
moveBits(earlyZEnable, 1, 16) |
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 4;
}
uint32_t enableForwardFacingPrimitive); //0/1
void clInsertFlatShadeFlags(ControlList* cl,
uint32_t flags)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = flags; cl->nextFreeByte += 4;
}
uint32_t flags);
void clInsertPointSize(ControlList* cl,
float size)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_POINT_SIZE_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = size; cl->nextFreeByte += 4;
}
float size);
void clInsertLineWidth(ControlList* cl,
float width)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_LINE_WIDTH_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
}
float width);
void clInsertRHTXBoundary(ControlList* cl,
uint32_t boundary) //sint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByte++;
*(uint16_t*)cl->nextFreeByte = moveBits(boundary, 16, 0); cl->nextFreeByte += 2;
}
uint32_t boundary); //sint16
void clInsertDepthOffset(ControlList* cl,
uint32_t units, //float 187
uint32_t factor) //float 187
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(factor, 16, 0) | moveBits(units, 16, 16); cl->nextFreeByte += 4;
}
uint32_t factor); //float 187
void clInsertClipWindow(ControlList* cl,
uint32_t width, //uint16
uint32_t height, //uint16
uint32_t bottomPixelCoord, //uint16
uint32_t leftPixelCoord) //uint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByte += 4;
}
uint32_t leftPixelCoord); //uint16
void clInsertViewPortOffset(ControlList* cl,
uint32_t x, //sint16
uint32_t y //sint16
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(x, 16, 0) | moveBits(y, 16, 16); cl->nextFreeByte += 4;
}
);
void clInsertZMinMaxClippingPlanes(ControlList* cl,
float minZw,
float maxZw
)
{
assert(cl);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = minZw; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = maxZw; cl->nextFreeByte += 4;
}
);
void clInsertClipperXYScaling(ControlList* cl,
float width, //half height in 1/16 of pixel
float height //half width in 1/16 of pixel
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = height; cl->nextFreeByte += 4;
}
);
void clInsertClipperZScaleOffset(ControlList* cl,
float zOffset, //zc to zs
float zScale //zc to zs
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = zScale; cl->nextFreeByte += 4;
*(float*)cl->nextFreeByte = zOffset; cl->nextFreeByte += 4;
}
);
void clInsertTileBinningModeConfiguration(ControlList* cl,
uint32_t doubleBufferInNonMsMode, //0/1
uint32_t tileAllocationBlockSize, //0/1/2/3 32/64/128/256 bytes
@ -541,105 +122,10 @@ void clInsertTileBinningModeConfiguration(ControlList* cl,
uint32_t tileStateDataArrayAddress, //16 byte aligned, size of 48 bytes * num tiles
uint32_t tileAllocationMemorySize,
uint32_t tileAllocationMemoryAddress
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = tileAllocationMemorySize; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = tileStateDataArrayAddress; cl->nextFreeByte += 4;
uint32_t tileSizeW = 64;
uint32_t tileSizeH = 64;
if(multisampleMode4x)
{
tileSizeW >>= 1;
tileSizeH >>= 1;
}
if(tileBuffer64BitColorDepth)
{
tileSizeH >>= 1;
}
uint32_t widthInTiles = divRoundUp(widthInPixels, tileSizeW);
uint32_t heightInTiles = divRoundUp(heightInPixels, tileSizeH);
*(uint8_t*)cl->nextFreeByte = widthInTiles; cl->nextFreeByte++;
*(uint8_t*)cl->nextFreeByte = heightInTiles; cl->nextFreeByte++;
*cl->nextFreeByte =
moveBits(multisampleMode4x, 1, 0) |
moveBits(tileBuffer64BitColorDepth, 1, 1) |
moveBits(autoInitializeTileStateDataArray, 1, 2) |
moveBits(tileAllocationInitialBlockSize, 2, 3) |
moveBits(tileAllocationBlockSize, 2, 5) |
moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByte++;
}
/*
void clInsertTileRenderingModeConfiguration(ControlList* cls,
ControlListAddress address,
uint32_t doubleBufferInNonMsMode, //0/1
uint32_t earlyZEarlyCovDisable, //0/1
uint32_t earlyZUpdateDirection, //0/1 lt,le/gt,ge
uint32_t selectCoverageMode, //0/1
uint32_t memoryFormat, //0/1/2 linear/t/lt
uint32_t decimateMode, //0/1/2 0x/4x/16x
uint32_t nonHDRFrameFormatColorFormat, //0/1/2 bgr565dithered/rgba8/bgr565nodither
uint32_t tileBufferHDRMode, //0/1
uint32_t multisampleMode4x, //0/1
uint32_t widthPixels,
uint32_t heightPixels)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_TILE_RENDERING_MODE_CONFIGURATION_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = moveBits(widthPixels, 16, 0) | moveBits(heightPixels, 16, 16); cls->nextFreeByte += 4;
*(uint16_t*)cls->nextFreeByte =
moveBits(multisampleMode4x, 1, 0) |
moveBits(tileBufferHDRMode, 1, 1) |
moveBits(nonHDRFrameFormatColorFormat, 2, 2) |
moveBits(decimateMode, 2, 4) |
moveBits(memoryFormat, 2, 6) |
moveBits(0, 1, 8) | //vg buffer enable
moveBits(selectCoverageMode, 1, 9) |
moveBits(earlyZUpdateDirection, 1, 10) |
moveBits(earlyZEarlyCovDisable, 1, 11) |
moveBits(doubleBufferInNonMsMode, 1, 12); cls->nextFreeByte += 2;
}
*/
/*
void clInsertTileCoordinates(ControlList* cl,
uint32_t tileColumnNumber, //int8
uint32_t tileRowNumber) //int8
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_TILE_COORDINATES_opcode; cl->nextFreeByte++;
*(uint16_t*)cl->nextFreeByte = moveBits(tileColumnNumber, 8, 0) | moveBits(tileRowNumber, 8, 8); cl->nextFreeByte += 2;
}
*/
);
void clInsertGEMRelocations(ControlList* cl,
uint32_t buffer0,
uint32_t buffer1)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = buffer0; cl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = buffer1; cl->nextFreeByte += 4;
}
//input: 2 cls (cl, handles cl)
uint32_t buffer1);
void clInsertShaderRecord(ControlList* cls,
uint32_t fragmentShaderIsSingleThreaded, //0/1
uint32_t pointSizeIncludedInShadedVertexData, //0/1
@ -652,94 +138,13 @@ void clInsertShaderRecord(ControlList* cls,
uint32_t vertexAttributeArraySelectBits,
uint32_t vertexTotalAttributesSize,
uint32_t vertexUniformsAddress,
ControlListAddress vertexCodeAddress)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
//TODO is this correct?
*cls->nextFreeByte =
moveBits(fragmentShaderIsSingleThreaded, 1, 0) |
moveBits(pointSizeIncludedInShadedVertexData, 1, 1) |
moveBits(enableClipping, 1, 2); cls->nextFreeByte++;
*cls->nextFreeByte = 0; cls->nextFreeByte++;
*(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = fragmentNumberOfVaryings; cls->nextFreeByte++;
clEmitShaderRelocation(cls, &fragmentCodeAddress);
*(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4;
*(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++;
*cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++;
clEmitShaderRelocation(cls, &vertexCodeAddress);
*(uint32_t*)cls->nextFreeByte = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); cls->nextFreeByte += 4; //???
cls->nextFreeByte += 4;
//skip coordinate shader stuff
cls->nextFreeByte += 16;
}
//input: 2 cls (cl, handles cl)
ControlListAddress vertexCodeAddress);
void clInsertAttributeRecord(ControlList* cls,
ControlListAddress address,
uint32_t sizeBytes,
uint32_t stride,
uint32_t vertexVPMOffset)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
uint32_t sizeBytesMinusOne = sizeBytes - 1;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++;
*cls->nextFreeByte = stride; cls->nextFreeByte++;
*cls->nextFreeByte = vertexVPMOffset; cls->nextFreeByte++;
cls->nextFreeByte++; //skip coordinate shader stuff
}
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle)
{
uint32_t c = 0;
uint32_t numHandles = clSize(handlesCl) / 4;
for(; c < numHandles; ++c)
{
if(((uint32_t*)handlesCl->buffer)[c] == handle)
{
//found
return c;
}
}
//write handle to handles cl
*(uint32_t*)handlesCl->nextFreeByte = handle;
handlesCl->nextFreeByte += 4;
return c;
}
//input: 2 cls (cl + handles cl)
static inline void clEmitShaderRelocation(ControlList* cls, const ControlListAddress* address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
assert(address);
assert(address->handle);
//search for handle in handles cl
//if found insert handle index
ControlList* cl = cls;
ControlList* handlesCl = cls + 1;
//store offset within handles in cl
*(uint32_t*)cl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle);
cl->nextFreeByte += 4;
}
uint32_t vertexVPMOffset);
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle);
#if defined (__cplusplus)
}

48
driver/LinearAllocator.c Normal file
View File

@ -0,0 +1,48 @@
#include "LinearAllocator.h"
#include "CustomAssert.h"
#include <stdint.h>
LinearAllocator createLinearAllocator(char* b, unsigned s)
{
assert(b);
assert(s > 0);
LinearAllocator la =
{
.buf = b,
.offset = 0,
.size = s
};
return la;
}
void destroyLinearAllocator(LinearAllocator* la)
{
la->buf = 0;
la->offset = 0;
la->size = 0;
}
void* linearAllocte(LinearAllocator* la, unsigned s)
{
assert(la->buf);
assert(la->size > 0);
if(la->offset + s >= la->size)
{
return 0; //no space left
}
char* p = la->buf + la->offset + s;
la->offset += s;
return p;
}
void linearFree(LinearAllocator* la, void* p)
{
//assert(0); //this shouldn't really happen, just destroy/reset the whole allocator
}

View File

@ -15,48 +15,10 @@ typedef struct LinearAllocator
unsigned size;
} LinearAllocator;
LinearAllocator createLinearAllocator(char* b, unsigned s)
{
assert(b);
assert(s > 0);
LinearAllocator la =
{
.buf = b,
.offset = 0,
.size = s
};
return la;
}
void destroyLinearAllocator(LinearAllocator* la)
{
la->buf = 0;
la->offset = 0;
la->size = 0;
}
void* linearAllocte(LinearAllocator* la, unsigned s)
{
assert(la->buf);
assert(la->size > 0);
if(la->offset + s >= la->size)
{
return 0; //no space left
}
char* p = la->buf + la->offset + s;
la->offset += s;
return p;
}
void linearFree(LinearAllocator* la, void* p)
{
//assert(0); //this shouldn't really happen, just destroy/reset the whole allocator
}
LinearAllocator createLinearAllocator(char* b, unsigned s);
void destroyLinearAllocator(LinearAllocator* la);
void* linearAllocte(LinearAllocator* la, unsigned s);
void linearFree(LinearAllocator* la, void* p);
#if defined (__cplusplus)
}

72
driver/PoolAllocator.c Normal file
View File

@ -0,0 +1,72 @@
#include "PoolAllocator.h"
#include "CustomAssert.h"
#include <stdint.h>
PoolAllocator createPoolAllocator(char* b, unsigned bs, unsigned s)
{
assert(b); //only allocated memory
assert(bs >= sizeof(void*)); //we need to be able to store
assert(s%bs==0); //we want a size that is the exact multiple of block size
assert(s > bs); //at least 1 element
PoolAllocator pa =
{
.buf = b,
.nextFreeBlock = (uint32_t*)b,
.blockSize = bs,
.size = s
};
//initialize linked list of free pointers
uint32_t* ptr = pa.nextFreeBlock;
for(unsigned c = 0; c < s/bs - 1; ++c)
{
*ptr = (uint32_t)ptr + bs;
ptr += bs;
}
*ptr = 0; //last element
return pa;
}
void destroyPoolAllocator(PoolAllocator* pa)
{
//actual memory freeing is done by caller
pa->buf = 0;
pa->nextFreeBlock = 0;
pa->blockSize = 0;
pa->size = 0;
}
void* poolAllocate(PoolAllocator* pa)
{
assert(pa->buf);
if(!pa->nextFreeBlock)
{
return 0; //no free blocks
}
//next free block will be allocated
void* ret = pa->nextFreeBlock;
//set next free block to the one the current next points to
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
return ret;
}
void poolFree(PoolAllocator* pa, void* p)
{
assert(pa->buf);
assert(p);
//set block to be freed to point to the current next free block
*(uint32_t*)p = (uint32_t)pa->nextFreeBlock;
//set next free block to the freshly freed block
pa->nextFreeBlock = p;
}

View File

@ -16,72 +16,10 @@ typedef struct PoolAllocator
unsigned size; //size is exact multiple of block size
} PoolAllocator;
PoolAllocator createPoolAllocator(char* b, unsigned bs, unsigned s)
{
assert(b); //only allocated memory
assert(bs >= sizeof(void*)); //we need to be able to store
assert(s%bs==0); //we want a size that is the exact multiple of block size
assert(s > bs); //at least 1 element
PoolAllocator pa =
{
.buf = b,
.nextFreeBlock = (uint32_t*)b,
.blockSize = bs,
.size = s
};
//initialize linked list of free pointers
uint32_t* ptr = pa.nextFreeBlock;
for(unsigned c = 0; c < s/bs - 1; ++c)
{
*ptr = (uint32_t)ptr + bs;
ptr += bs;
}
*ptr = 0; //last element
return pa;
}
void destroyPoolAllocator(PoolAllocator* pa)
{
//actual memory freeing is done by caller
pa->buf = 0;
pa->nextFreeBlock = 0;
pa->blockSize = 0;
pa->size = 0;
}
void* poolAllocate(PoolAllocator* pa)
{
assert(pa->buf);
if(!pa->nextFreeBlock)
{
return 0; //no free blocks
}
//next free block will be allocated
void* ret = pa->nextFreeBlock;
//set next free block to the one the current next points to
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
return ret;
}
void poolFree(PoolAllocator* pa, void* p)
{
assert(pa->buf);
assert(p);
//set block to be freed to point to the current next free block
*(uint32_t*)p = (uint32_t)pa->nextFreeBlock;
//set next free block to the freshly freed block
pa->nextFreeBlock = p;
}
PoolAllocator createPoolAllocator(char* b, unsigned bs, unsigned s);
void destroyPoolAllocator(PoolAllocator* pa);
void* poolAllocate(PoolAllocator* pa);
void poolFree(PoolAllocator* pa, void* p);
#if defined (__cplusplus)
}

449
driver/command.c Normal file
View File

@ -0,0 +1,449 @@
#include "common.h"
#include "kernel/vc4_packet.h"
#include "../brcm/cle/v3d_decoder.h"
#include "../brcm/clif/clif_dump.h"
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffers-pools
* Command pools are opaque objects that command buffer memory is allocated from, and which allow the implementation to amortize the
* cost of resource creation across multiple command buffers. Command pools are externally synchronized, meaning that a command pool must
* not be used concurrently in multiple threads. That includes use via recording commands on any command buffers allocated from the pool,
* as well as operations that allocate, free, and reset command buffers or the pool itself.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool(
VkDevice device,
const VkCommandPoolCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkCommandPool* pCommandPool)
{
assert(device);
assert(pCreateInfo);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//VK_COMMAND_POOL_CREATE_TRANSIENT_BIT
//specifies that command buffers allocated from the pool will be short-lived, meaning that they will be reset or freed in a relatively short timeframe.
//This flag may be used by the implementation to control memory allocation behavior within the pool.
//--> definitely use pool allocator
//VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
//allows any command buffer allocated from a pool to be individually reset to the initial state; either by calling vkResetCommandBuffer, or via the implicit reset when calling vkBeginCommandBuffer.
//If this flag is not set on a pool, then vkResetCommandBuffer must not be called for any command buffer allocated from that pool.
//TODO pool family ignored for now
_commandPool* cp = malloc(sizeof(_commandPool));
if(!cp)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->queueFamilyIndex = pCreateInfo->queueFamilyIndex;
//initial number of command buffers to hold
int numCommandBufs = 100;
int controlListSize = ARM_PAGE_SIZE * 100;
//if(pCreateInfo->flags & VK_COMMAND_POOL_CREATE_TRANSIENT_BIT)
{
//use pool allocator
void* pamem = malloc(numCommandBufs * sizeof(_commandBuffer));
if(!pamem)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->pa = createPoolAllocator(pamem, sizeof(_commandBuffer), numCommandBufs * sizeof(_commandBuffer));
void* cpamem = malloc(controlListSize);
if(!cpamem)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->cpa = createConsecutivePoolAllocator(cpamem, ARM_PAGE_SIZE, controlListSize);
}
*pCommandPool = (VkCommandPool)cp;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffer-allocation
* vkAllocateCommandBuffers can be used to create multiple command buffers. If the creation of any of those command buffers fails,
* the implementation must destroy all successfully created command buffer objects from this command, set all entries of the pCommandBuffers array to NULL and return the error.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
VkDevice device,
const VkCommandBufferAllocateInfo* pAllocateInfo,
VkCommandBuffer* pCommandBuffers)
{
assert(device);
assert(pAllocateInfo);
assert(pCommandBuffers);
VkResult res = VK_SUCCESS;
_commandPool* cp = (_commandPool*)pAllocateInfo->commandPool;
//if(cp->usePoolAllocator)
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
pCommandBuffers[c] = poolAllocate(&cp->pa);
if(!pCommandBuffers[c])
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
pCommandBuffers[c]->shaderRecCount = 0;
pCommandBuffers[c]->usageFlags = 0;
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
pCommandBuffers[c]->cp = cp;
clInit(&pCommandBuffers[c]->binCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->handlesCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1));
if(!pCommandBuffers[c]->binCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->handlesCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->shaderRecCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->uniformsCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
}
}
if(res != VK_SUCCESS)
{
//if(cp->usePoolAllocator)
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
pCommandBuffers[c] = 0;
}
}
}
return res;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkBeginCommandBuffer
*/
VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer(
VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo* pBeginInfo)
{
assert(commandBuffer);
assert(pBeginInfo);
//TODO
//VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
//specifies that each recording of the command buffer will only be submitted once, and the command buffer will be reset and recorded again between each submission.
//VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT
//specifies that a secondary command buffer is considered to be entirely inside a render pass. If this is a primary command buffer, then this bit is ignored
//VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT
//specifies that a command buffer can be resubmitted to a queue while it is in the pending state, and recorded into multiple primary command buffers
//When a command buffer begins recording, all state in that command buffer is undefined
struct drm_vc4_submit_cl submitCl =
{
.color_read.hindex = ~0,
.zs_read.hindex = ~0,
.color_write.hindex = ~0,
.msaa_color_write.hindex = ~0,
.zs_write.hindex = ~0,
.msaa_zs_write.hindex = ~0,
};
commandBuffer->usageFlags = pBeginInfo->flags;
commandBuffer->shaderRecCount = 0;
commandBuffer->state = CMDBUF_STATE_RECORDING;
commandBuffer->submitCl = submitCl;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEndCommandBuffer
* If there was an error during recording, the application will be notified by an unsuccessful return code returned by vkEndCommandBuffer.
* If the application wishes to further use the command buffer, the command buffer must be reset. The command buffer must have been in the recording state,
* and is moved to the executable state.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer(
VkCommandBuffer commandBuffer)
{
assert(commandBuffer);
//Increment the semaphore indicating that binning is done and
//unblocking the render thread. Note that this doesn't act
//until the FLUSH completes.
//The FLUSH caps all of our bin lists with a
//VC4_PACKET_RETURN.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length);
clInsertIncrementSemaphore(&commandBuffer->binCl);
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length);
clInsertFlush(&commandBuffer->binCl);
commandBuffer->state = CMDBUF_STATE_EXECUTABLE;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueueSubmit
* vkQueueSubmit is a queue submission command, with each batch defined by an element of pSubmits as an instance of the VkSubmitInfo structure.
* Batches begin execution in the order they appear in pSubmits, but may complete out of order.
* Fence and semaphore operations submitted with vkQueueSubmit have additional ordering constraints compared to other submission commands,
* with dependencies involving previous and subsequent queue operations. Information about these additional constraints can be found in the semaphore and
* fence sections of the synchronization chapter.
* Details on the interaction of pWaitDstStageMask with synchronization are described in the semaphore wait operation section of the synchronization chapter.
* The order that batches appear in pSubmits is used to determine submission order, and thus all the implicit ordering guarantees that respect it.
* Other than these implicit ordering guarantees and any explicit synchronization primitives, these batches may overlap or otherwise execute out of order.
* If any command buffer submitted to this queue is in the executable state, it is moved to the pending state. Once execution of all submissions of a command buffer complete,
* it moves from the pending state, back to the executable state. If a command buffer was recorded with the VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT flag,
* it instead moves back to the invalid state.
* If vkQueueSubmit fails, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced by the submitted command buffers and any semaphores
* referenced by pSubmits is unaffected by the call or its failure. If vkQueueSubmit fails in such a way that the implementation is unable to make that guarantee,
* the implementation must return VK_ERROR_DEVICE_LOST. See Lost Device.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
VkQueue queue,
uint32_t submitCount,
const VkSubmitInfo* pSubmits,
VkFence fence)
{
assert(queue);
for(int c = 0; c < pSubmits->waitSemaphoreCount; ++c)
{
sem_wait((sem_t*)pSubmits->pWaitSemaphores[c]);
}
//TODO: deal with pSubmits->pWaitDstStageMask
//TODO wait for fence??
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
{
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_EXECUTABLE)
{
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_PENDING;
}
}
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
{
VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c];
cmdbuf->submitCl.bo_handles = cmdbuf->handlesCl.buffer;
cmdbuf->submitCl.bo_handle_count = clSize(&cmdbuf->handlesCl) / 4;
cmdbuf->submitCl.bin_cl = cmdbuf->binCl.buffer;
cmdbuf->submitCl.bin_cl_size = clSize(&cmdbuf->binCl);
cmdbuf->submitCl.shader_rec = cmdbuf->shaderRecCl.buffer;
cmdbuf->submitCl.shader_rec_size = clSize(&cmdbuf->shaderRecCl);
cmdbuf->submitCl.shader_rec_count = cmdbuf->shaderRecCount;
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
printf("BCL:\n");
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
printf("BO handles: ");
for(int d = 0; d < cmdbuf->submitCl.bo_handle_count; ++d)
{
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.bo_handles)+d));
}
printf("\nwidth height: %u, %u\n", cmdbuf->submitCl.width, cmdbuf->submitCl.height);
printf("tile min/max: %u,%u %u,%u\n", cmdbuf->submitCl.min_x_tile, cmdbuf->submitCl.min_y_tile, cmdbuf->submitCl.max_x_tile, cmdbuf->submitCl.max_y_tile);
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_read.hindex, cmdbuf->submitCl.color_read.offset, cmdbuf->submitCl.color_read.bits, cmdbuf->submitCl.color_read.flags);
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_write.hindex, cmdbuf->submitCl.color_write.offset, cmdbuf->submitCl.color_write.bits, cmdbuf->submitCl.color_write.flags);
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_read.hindex, cmdbuf->submitCl.zs_read.offset, cmdbuf->submitCl.zs_read.bits, cmdbuf->submitCl.zs_read.flags);
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_write.hindex, cmdbuf->submitCl.zs_write.offset, cmdbuf->submitCl.zs_write.bits, cmdbuf->submitCl.zs_write.flags);
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_color_write.hindex, cmdbuf->submitCl.msaa_color_write.offset, cmdbuf->submitCl.msaa_color_write.bits, cmdbuf->submitCl.msaa_color_write.flags);
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_zs_write.hindex, cmdbuf->submitCl.msaa_zs_write.offset, cmdbuf->submitCl.msaa_zs_write.bits, cmdbuf->submitCl.msaa_zs_write.flags);
printf("clear color packed rgba %u %u\n", cmdbuf->submitCl.clear_color[0], cmdbuf->submitCl.clear_color[1]);
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
printf("flags %u\n", cmdbuf->submitCl.flags);
//submit ioctl
static uint64_t lastFinishedSeqno = 0;
vc4_cl_submit(controlFd, &cmdbuf->submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
}
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
{
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_PENDING)
{
if(pSubmits->pCommandBuffers[c]->usageFlags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)
{
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_INVALID;
}
else
{
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_EXECUTABLE;
}
}
}
for(int c = 0; c < pSubmits->signalSemaphoreCount; ++c)
{
sem_post((sem_t*)pSubmits->pSignalSemaphores[c]);
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkFreeCommandBuffers
* Any primary command buffer that is in the recording or executable state and has any element of pCommandBuffers recorded into it, becomes invalid.
*/
VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers(
VkDevice device,
VkCommandPool commandPool,
uint32_t commandBufferCount,
const VkCommandBuffer* pCommandBuffers)
{
assert(device);
assert(commandPool);
assert(pCommandBuffers);
_commandPool* cp = (_commandPool*)commandPool;
for(int c = 0; c < commandBufferCount; ++c)
{
//if(cp->usePoolAllocator)
{
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
}
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyCommandPool
* When a pool is destroyed, all command buffers allocated from the pool are freed.
* Any primary command buffer allocated from another VkCommandPool that is in the recording or executable state and has a secondary command buffer
* allocated from commandPool recorded into it, becomes invalid.
*/
VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool(
VkDevice device,
VkCommandPool commandPool,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
assert(commandPool);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
_commandPool* cp = (_commandPool*)commandPool;
//if(cp->usePoolAllocator)
{
free(cp->pa.buf);
free(cp->cpa.buf);
destroyPoolAllocator(&cp->pa);
destroyConsecutivePoolAllocator(&cp->cpa);
}
free(cp);
}
void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize)
{
if(!clHasEnoughSpace(cl, commandSize))
{
uint32_t currSize = clSize(cl);
cl->buffer = consecutivePoolReAllocate(&cb->cp->cpa, cl->buffer, cl->numBlocks); assert(cl->buffer);
cl->nextFreeByte = cl->buffer + currSize;
}
}
void clDump(void* cl, uint32_t size)
{
struct v3d_device_info devinfo = {
/* While the driver supports V3D 2.1 and 2.6, we haven't split
* off a 2.6 XML yet (there are a couple of fields different
* in render target formatting)
*/
.ver = 21,
};
struct v3d_spec* spec = v3d_spec_load(&devinfo);
struct clif_dump *clif = clif_dump_init(&devinfo, stderr, true);
uint32_t offset = 0, hw_offset = 0;
uint8_t *p = cl;
while (offset < size) {
struct v3d_group *inst = v3d_spec_find_instruction(spec, p);
uint8_t header = *p;
uint32_t length;
if (inst == NULL) {
printf("0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
offset, hw_offset, header, header);
return;
}
length = v3d_group_get_length(inst);
printf("0x%08x 0x%08x: 0x%02x %s\n",
offset, hw_offset, header, v3d_group_get_name(inst));
v3d_print_group(clif, inst, offset, p);
switch (header) {
case VC4_PACKET_HALT:
case VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF:
return;
default:
break;
}
offset += length;
if (header != VC4_PACKET_GEM_HANDLES)
hw_offset += length;
p += length;
}
clif_dump_destroy(clif);
}

328
driver/common.c Normal file
View File

@ -0,0 +1,328 @@
#include "common.h"
#include "kernel/vc4_packet.h"
void createImageBO(_image* i)
{
assert(i);
assert(i->format);
assert(i->width);
assert(i->height);
uint32_t bpp = getFormatBpp(i->format);
uint32_t pixelSizeBytes = bpp / 8;
uint32_t nonPaddedSize = i->width * i->height * pixelSizeBytes;
i->paddedWidth = i->width;
i->paddedHeight = i->height;
//need to pad to T format, as HW automatically chooses that
if(nonPaddedSize > 4096)
{
getPaddedTextureDimensionsT(i->width, i->height, bpp, &i->paddedWidth, &i->paddedHeight);
}
i->size = i->paddedWidth * i->paddedHeight * pixelSizeBytes;
i->stride = i->paddedWidth * pixelSizeBytes;
i->handle = vc4_bo_alloc(controlFd, i->size, "swapchain image"); assert(i->handle);
//set tiling to T if size > 4KB
if(nonPaddedSize > 4096)
{
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret);
i->tiling = VC4_TILING_FORMAT_T;
}
else
{
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_LINEAR); assert(ret);
i->tiling = VC4_TILING_FORMAT_LT;
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdClearColorImage
* Color and depth/stencil images can be cleared outside a render pass instance using vkCmdClearColorImage or vkCmdClearDepthStencilImage, respectively.
* These commands are only allowed outside of a render pass instance.
*/
VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage(
VkCommandBuffer commandBuffer,
VkImage image,
VkImageLayout imageLayout,
const VkClearColorValue* pColor,
uint32_t rangeCount,
const VkImageSubresourceRange* pRanges)
{
assert(commandBuffer);
assert(image);
assert(pColor);
//TODO this should only flag an image for clearing. This can only be called outside a renderpass
//actual clearing would only happen:
// -if image is rendered to (insert clear before first draw call)
// -if the image is bound for sampling (submit a CL with a clear)
// -if a command buffer is submitted without any rendering (insert clear)
// -etc.
//we shouldn't clear an image if noone uses it
//TODO ranges support
assert(imageLayout == VK_IMAGE_LAYOUT_GENERAL ||
imageLayout == VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR ||
imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
assert(commandBuffer->state == CMDBUF_STATE_RECORDING);
assert(_queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_GRAPHICS_BIT || _queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_COMPUTE_BIT);
_image* i = image;
assert(i->usageBits & VK_IMAGE_USAGE_TRANSFER_DST_BIT);
//TODO externally sync cmdbuf, cmdpool
i->needToClear = 1;
i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
}
int findInstanceExtension(char* name)
{
for(int c = 0; c < numInstanceExtensions; ++c)
{
if(strcmp(instanceExtensions[c].extensionName, name) == 0)
{
return c;
}
}
return -1;
}
int findDeviceExtension(char* name)
{
for(int c = 0; c < numDeviceExtensions; ++c)
{
if(strcmp(deviceExtensions[c].extensionName, name) == 0)
{
return c;
}
}
return -1;
}
//Textures in T format:
//formed out of 4KB tiles, which have 1KB subtiles (see page 105 in VC4 arch guide)
//1KB subtiles have 512b microtiles.
//Width/height of the 512b microtiles is the following:
// 64bpp: 2x4
// 32bpp: 4x4
// 16bpp: 8x4
// 8bpp: 8x8
// 4bpp: 16x8
// 1bpp: 32x16
//Therefore width/height of 1KB subtiles is the following:
// 64bpp: 8x16
// 32bpp: 16x16
// 16bpp: 32x16
// 8bpp: 32x32
// 4bpp: 64x32
// 1bpp: 128x64
//Finally width/height of the 4KB tiles:
// 64bpp: 16x32
// 32bpp: 32x32
// 16bpp: 64x32
// 8bpp: 64x64
// 4bpp: 128x64
// 1bpp: 256x128
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight)
{
assert(paddedWidth);
assert(paddedHeight);
uint32_t tileW = 0;
uint32_t tileH = 0;
switch(bpp)
{
case 64:
{
tileW = 16;
tileH = 32;
break;
}
case 32:
{
tileW = 32;
tileH = 32;
break;
}
case 16:
{
tileW = 64;
tileH = 32;
break;
}
case 8:
{
tileW = 64;
tileH = 64;
break;
}
case 4:
{
tileW = 128;
tileH = 64;
break;
}
case 1:
{
tileW = 256;
tileH = 128;
break;
}
default:
{
assert(0); //unsupported
}
}
*paddedWidth = ((tileW - (width % tileW)) % tileW) + width;
*paddedHeight = ((tileH - (height % tileH)) % tileH) + height;
}
uint32_t getFormatBpp(VkFormat f)
{
switch(f)
{
case VK_FORMAT_R16G16B16A16_SFLOAT:
return 64;
case VK_FORMAT_R8G8B8_UNORM: //padded to 32
case VK_FORMAT_R8G8B8A8_UNORM:
return 32;
return 32;
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16_SINT:
return 16;
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_SINT:
return 8;
default:
assert(0);
return 0;
}
}
uint32_t packVec4IntoABGR8(const float rgba[4])
{
uint8_t r, g, b, a;
r = rgba[0] * 255.0;
g = rgba[1] * 255.0;
b = rgba[2] * 255.0;
a = rgba[3] * 255.0;
uint32_t res = 0 |
(a << 24) |
(b << 16) |
(g << 8) |
(r << 0);
return res;
}
/*static inline void util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)
{
ubyte r = 0;
ubyte g = 0;
ubyte b = 0;
ubyte a = 0;
if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) <= 8) {
r = float_to_ubyte(rgba[0]);
g = float_to_ubyte(rgba[1]);
b = float_to_ubyte(rgba[2]);
a = float_to_ubyte(rgba[3]);
}
switch (format) {
case PIPE_FORMAT_ABGR8888_UNORM:
{
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | a;
}
return;
case PIPE_FORMAT_XBGR8888_UNORM:
{
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | 0xff;
}
return;
case PIPE_FORMAT_BGRA8888_UNORM:
{
uc->ui[0] = (a << 24) | (r << 16) | (g << 8) | b;
}
return;
case PIPE_FORMAT_BGRX8888_UNORM:
{
uc->ui[0] = (0xffu << 24) | (r << 16) | (g << 8) | b;
}
return;
case PIPE_FORMAT_ARGB8888_UNORM:
{
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | a;
}
return;
case PIPE_FORMAT_XRGB8888_UNORM:
{
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | 0xff;
}
return;
case PIPE_FORMAT_B5G6R5_UNORM:
{
uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
}
return;
case PIPE_FORMAT_B5G5R5X1_UNORM:
{
uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
}
return;
case PIPE_FORMAT_B5G5R5A1_UNORM:
{
uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
}
return;
case PIPE_FORMAT_B4G4R4A4_UNORM:
{
uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
}
return;
case PIPE_FORMAT_A8_UNORM:
{
uc->ub = a;
}
return;
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
{
uc->ub = r;
}
return;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
{
uc->f[0] = rgba[0];
uc->f[1] = rgba[1];
uc->f[2] = rgba[2];
uc->f[3] = rgba[3];
}
return;
case PIPE_FORMAT_R32G32B32_FLOAT:
{
uc->f[0] = rgba[0];
uc->f[1] = rgba[1];
uc->f[2] = rgba[2];
}
return;
default:
util_format_write_4f(format, rgba, 0, uc, 0, 0, 0, 1, 1);
}
}*/

120
driver/common.h Normal file
View File

@ -0,0 +1,120 @@
#pragma once
#include <drm/drm.h>
#include <drm/drm_fourcc.h>
#include <drm/vc4_drm.h>
#include <vulkan/vulkan.h>
#include "vkExt.h"
#include "AlignedAllocator.h"
#include "PoolAllocator.h"
#include "ConsecutivePoolAllocator.h"
#include "LinearAllocator.h"
#include <stdio.h>
#include "CustomAssert.h"
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#include <pthread.h>
#include <semaphore.h>
#include "modeset.h"
#include "kernelInterface.h"
#include "ControlListUtil.h"
#ifndef min
#define min(a, b) (a < b ? a : b)
#endif
#ifndef max
#define max(a, b) (a > b ? a : b)
#endif
#include "vkCaps.h"
typedef struct VkPhysicalDevice_T
{
//hardware id?
int dummy;
} _physicalDevice;
typedef struct VkQueue_T
{
uint64_t lastEmitSeqno;
} _queue;
typedef struct VkCommandPool_T
{
PoolAllocator pa;
ConsecutivePoolAllocator cpa;
uint32_t queueFamilyIndex;
} _commandPool;
typedef enum commandBufferState
{
CMDBUF_STATE_INITIAL = 0,
CMDBUF_STATE_RECORDING,
CMDBUF_STATE_EXECUTABLE,
CMDBUF_STATE_PENDING,
CMDBUF_STATE_INVALID,
CMDBUF_STATE_LAST
} commandBufferState;
typedef struct VkCommandBuffer_T
{
//Recorded commands include commands to bind pipelines and descriptor sets to the command buffer, commands to modify dynamic state, commands to draw (for graphics rendering),
//commands to dispatch (for compute), commands to execute secondary command buffers (for primary command buffers only), commands to copy buffers and images, and other commands
struct drm_vc4_submit_cl submitCl;
ControlList binCl;
ControlList shaderRecCl;
uint32_t shaderRecCount;
ControlList uniformsCl;
ControlList handlesCl;
commandBufferState state;
VkCommandBufferUsageFlags usageFlags;
_commandPool* cp;
} _commandBuffer;
typedef struct VkInstance_T
{
//supposedly this should contain all the enabled layers?
int enabledExtensions[numInstanceExtensions];
int numEnabledExtensions;
_physicalDevice dev;
int chipVersion;
int hasTiling;
int hasControlFlow;
int hasEtc1;
int hasThreadedFs;
int hasMadvise;
} _instance;
typedef struct VkDevice_T
{
int enabledExtensions[numDeviceExtensions];
int numEnabledExtensions;
VkPhysicalDeviceFeatures enabledFeatures;
_physicalDevice* dev;
_queue* queues[numQueueFamilies];
int numQueues[numQueueFamilies];
} _device;
typedef struct VkSwapchain_T
{
_image* images;
uint32_t numImages;
uint32_t backbufferIdx;
VkSurfaceKHR surface;
} _swapchain;
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight);
uint32_t getFormatBpp(VkFormat f);
uint32_t packVec4IntoABGR8(const float rgba[4]);
int findInstanceExtension(char* name);
int findDeviceExtension(char* name);
void createImageBO(_image* i);

314
driver/device.c Normal file
View File

@ -0,0 +1,314 @@
#include "common.h"
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#devsandqueues-physical-device-enumeration
* If pPhysicalDevices is NULL, then the number of physical devices available is returned in pPhysicalDeviceCount. Otherwise, pPhysicalDeviceCount must point to a
* variable set by the user to the number of elements in the pPhysicalDevices array, and on return the variable is overwritten with the number of handles actually
* written to pPhysicalDevices. If pPhysicalDeviceCount is less than the number of physical devices available, at most pPhysicalDeviceCount structures will be written.
* If pPhysicalDeviceCount is smaller than the number of physical devices available, VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the
* available physical devices were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices(
VkInstance instance,
uint32_t* pPhysicalDeviceCount,
VkPhysicalDevice* pPhysicalDevices)
{
assert(instance);
//TODO is there a way to check if there's a gpu (and it's the rPi)?
int gpuExists = access( "/dev/dri/card0", F_OK ) != -1;
int numGPUs = gpuExists;
assert(pPhysicalDeviceCount);
if(!pPhysicalDevices)
{
*pPhysicalDeviceCount = numGPUs;
return VK_SUCCESS;
}
int arraySize = *pPhysicalDeviceCount;
int elementsWritten = min(numGPUs, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pPhysicalDevices[c] = &instance->dev;
}
*pPhysicalDeviceCount = elementsWritten;
if(elementsWritten < arraySize)
{
return VK_INCOMPLETE;
}
else
{
return VK_SUCCESS;
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceProperties
*/
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties* pProperties)
{
assert(physicalDevice);
assert(pProperties);
VkPhysicalDeviceSparseProperties sparseProps =
{
.residencyStandard2DBlockShape = 1,
.residencyStandard2DMultisampleBlockShape = 1,
.residencyStandard3DBlockShape = 1,
.residencyAlignedMipSize = 1,
.residencyNonResidentStrict = 1
};
pProperties->apiVersion = VK_MAKE_VERSION(1,1,0);
pProperties->driverVersion = 1; //we'll simply call this v1
pProperties->vendorID = 0x14E4; //Broadcom
pProperties->deviceID = 0; //TODO dunno?
pProperties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
strcpy(pProperties->deviceName, "VideoCore IV HW");
//pProperties->pipelineCacheUUID
pProperties->limits = _limits;
pProperties->sparseProperties = sparseProps;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceFeatures
*/
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures* pFeatures)
{
assert(physicalDevice);
assert(pFeatures);
*pFeatures = _features;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateDeviceExtensionProperties
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties(
VkPhysicalDevice physicalDevice,
const char* pLayerName,
uint32_t* pPropertyCount,
VkExtensionProperties* pProperties)
{
assert(physicalDevice);
assert(!pLayerName); //layers ignored for now
assert(pPropertyCount);
if(!pProperties)
{
*pPropertyCount = numDeviceExtensions;
return VK_INCOMPLETE;
}
int arraySize = *pPropertyCount;
int elementsWritten = min(numDeviceExtensions, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pProperties[c] = deviceExtensions[c];
}
*pPropertyCount = elementsWritten;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceQueueFamilyProperties
* If pQueueFamilyProperties is NULL, then the number of queue families available is returned in pQueueFamilyPropertyCount.
* Otherwise, pQueueFamilyPropertyCount must point to a variable set by the user to the number of elements in the pQueueFamilyProperties array,
* and on return the variable is overwritten with the number of structures actually written to pQueueFamilyProperties. If pQueueFamilyPropertyCount
* is less than the number of queue families available, at most pQueueFamilyPropertyCount structures will be written.
*/
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties(
VkPhysicalDevice physicalDevice,
uint32_t* pQueueFamilyPropertyCount,
VkQueueFamilyProperties* pQueueFamilyProperties)
{
assert(physicalDevice);
assert(pQueueFamilyPropertyCount);
if(!pQueueFamilyProperties)
{
*pQueueFamilyPropertyCount = 1;
return;
}
int arraySize = *pQueueFamilyPropertyCount;
int elementsWritten = min(numQueueFamilies, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pQueueFamilyProperties[c] = _queueFamilyProperties[c];
}
*pQueueFamilyPropertyCount = elementsWritten;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceSupportKHR
* does this queue family support presentation to this surface?
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
VkSurfaceKHR surface,
VkBool32* pSupported)
{
assert(pSupported);
assert(surface);
assert(physicalDevice);
assert(queueFamilyIndex < numQueueFamilies);
//TODO if we plan to support headless rendering, there should be 2 families
//one using /dev/dri/card0 which has modesetting
//other using /dev/dri/renderD128 which does not support modesetting, this would say false here
*pSupported = VK_TRUE;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateDevice
* vkCreateDevice verifies that extensions and features requested in the ppEnabledExtensionNames and pEnabledFeatures
* members of pCreateInfo, respectively, are supported by the implementation. If any requested extension is not supported,
* vkCreateDevice must return VK_ERROR_EXTENSION_NOT_PRESENT. If any requested feature is not supported, vkCreateDevice must return
* VK_ERROR_FEATURE_NOT_PRESENT. Support for extensions can be checked before creating a device by querying vkEnumerateDeviceExtensionProperties
* After verifying and enabling the extensions the VkDevice object is created and returned to the application.
* If a requested extension is only supported by a layer, both the layer and the extension need to be specified at vkCreateInstance
* time for the creation to succeed. Multiple logical devices can be created from the same physical device. Logical device creation may
* fail due to lack of device-specific resources (in addition to the other errors). If that occurs, vkCreateDevice will return VK_ERROR_TOO_MANY_OBJECTS.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(
VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDevice* pDevice)
{
assert(physicalDevice);
assert(pDevice);
assert(pCreateInfo);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
*pDevice = malloc(sizeof(_device));
if(!pDevice)
{
return VK_ERROR_TOO_MANY_OBJECTS;
}
(*pDevice)->dev = physicalDevice;
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
{
int findres = findDeviceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
if(findres > -1)
{
(*pDevice)->enabledExtensions[(*pDevice)->numEnabledExtensions] = findres;
(*pDevice)->numEnabledExtensions++;
}
else
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
}
VkBool32* requestedFeatures = pCreateInfo->pEnabledFeatures;
VkBool32* supportedFeatures = &_features;
if(requestedFeatures)
{
for(int c = 0; c < numFeatures; ++c)
{
if(requestedFeatures[c] && !supportedFeatures[c])
{
return VK_ERROR_FEATURE_NOT_PRESENT;
}
}
(*pDevice)->enabledFeatures = *pCreateInfo->pEnabledFeatures;
}
else
{
memset(&(*pDevice)->enabledFeatures, 0, sizeof((*pDevice)->enabledFeatures)); //just disable everything
}
//layers ignored per spec
//pCreateInfo->enabledLayerCount
for(int c = 0; c < numQueueFamilies; ++c)
{
(*pDevice)->queues[c] = 0;
}
if(pCreateInfo->queueCreateInfoCount > 0)
{
for(int c = 0; c < pCreateInfo->queueCreateInfoCount; ++c)
{
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = malloc(sizeof(_queue)*pCreateInfo->pQueueCreateInfos[c].queueCount);
if(!(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for(int d = 0; d < pCreateInfo->pQueueCreateInfos[c].queueCount; ++d)
{
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex][d].lastEmitSeqno = 0;
}
(*pDevice)->numQueues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = pCreateInfo->pQueueCreateInfos[c].queueCount;
}
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetDeviceQueue
* vkGetDeviceQueue must only be used to get queues that were created with the flags parameter of VkDeviceQueueCreateInfo set to zero.
* To get queues that were created with a non-zero flags parameter use vkGetDeviceQueue2.
*/
VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue(
VkDevice device,
uint32_t queueFamilyIndex,
uint32_t queueIndex,
VkQueue* pQueue)
{
assert(device);
assert(pQueue);
assert(queueFamilyIndex < numQueueFamilies);
assert(queueIndex < device->numQueues[queueFamilyIndex]);
*pQueue = &device->queues[queueFamilyIndex][queueIndex];
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyDevice
* To ensure that no work is active on the device, vkDeviceWaitIdle can be used to gate the destruction of the device.
* Prior to destroying a device, an application is responsible for destroying/freeing any Vulkan objects that were created using that device as the
* first parameter of the corresponding vkCreate* or vkAllocate* command
*/
VKAPI_ATTR void VKAPI_CALL vkDestroyDevice(
VkDevice device,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO
}

View File

@ -1,1938 +0,0 @@
#include <stdio.h>
#include "CustomAssert.h"
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#include <pthread.h>
#include <semaphore.h>
#include <vulkan/vulkan.h>
#include "vkExt.h"
#include "modeset.h"
#include "kernelInterface.h"
#include "ControlListUtil.h"
#include "AlignedAllocator.h"
#include "PoolAllocator.h"
#include "ConsecutivePoolAllocator.h"
#include "LinearAllocator.h"
#include "kernel/vc4_packet.h"
#include "../brcm/cle/v3d_decoder.h"
#include "../brcm/clif/clif_dump.h"
#ifndef min
#define min(a, b) (a < b ? a : b)
#endif
#ifndef max
#define max(a, b) (a > b ? a : b)
#endif
#include "vkCaps.h"
typedef struct VkPhysicalDevice_T
{
//hardware id?
int dummy;
} _physicalDevice;
typedef struct VkQueue_T
{
uint64_t lastEmitSeqno;
} _queue;
typedef struct VkCommandPool_T
{
PoolAllocator pa;
ConsecutivePoolAllocator cpa;
uint32_t queueFamilyIndex;
} _commandPool;
typedef enum commandBufferState
{
CMDBUF_STATE_INITIAL = 0,
CMDBUF_STATE_RECORDING,
CMDBUF_STATE_EXECUTABLE,
CMDBUF_STATE_PENDING,
CMDBUF_STATE_INVALID,
CMDBUF_STATE_LAST
} commandBufferState;
typedef struct VkCommandBuffer_T
{
//Recorded commands include commands to bind pipelines and descriptor sets to the command buffer, commands to modify dynamic state, commands to draw (for graphics rendering),
//commands to dispatch (for compute), commands to execute secondary command buffers (for primary command buffers only), commands to copy buffers and images, and other commands
struct drm_vc4_submit_cl submitCl;
ControlList binCl;
ControlList shaderRecCl;
uint32_t shaderRecCount;
ControlList uniformsCl;
ControlList handlesCl;
commandBufferState state;
VkCommandBufferUsageFlags usageFlags;
_commandPool* cp;
} _commandBuffer;
typedef struct VkInstance_T
{
//supposedly this should contain all the enabled layers?
int enabledExtensions[numInstanceExtensions];
int numEnabledExtensions;
_physicalDevice dev;
int chipVersion;
int hasTiling;
int hasControlFlow;
int hasEtc1;
int hasThreadedFs;
int hasMadvise;
} _instance;
typedef struct VkDevice_T
{
int enabledExtensions[numDeviceExtensions];
int numEnabledExtensions;
VkPhysicalDeviceFeatures enabledFeatures;
_physicalDevice* dev;
_queue* queues[numQueueFamilies];
int numQueues[numQueueFamilies];
} _device;
typedef struct VkSwapchain_T
{
_image* images;
uint32_t numImages;
uint32_t backbufferIdx;
VkSurfaceKHR surface;
} _swapchain;
void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize)
{
if(!clHasEnoughSpace(cl, commandSize))
{
uint32_t currSize = clSize(cl);
cl->buffer = consecutivePoolReAllocate(&cb->cp->cpa, cl->buffer, cl->numBlocks); assert(cl->buffer);
cl->nextFreeByte = cl->buffer + currSize;
}
}
void clDump(void* cl, uint32_t size)
{
struct v3d_device_info devinfo = {
/* While the driver supports V3D 2.1 and 2.6, we haven't split
* off a 2.6 XML yet (there are a couple of fields different
* in render target formatting)
*/
.ver = 21,
};
struct v3d_spec* spec = v3d_spec_load(&devinfo);
struct clif_dump *clif = clif_dump_init(&devinfo, stderr, true);
uint32_t offset = 0, hw_offset = 0;
uint8_t *p = cl;
while (offset < size) {
struct v3d_group *inst = v3d_spec_find_instruction(spec, p);
uint8_t header = *p;
uint32_t length;
if (inst == NULL) {
printf("0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
offset, hw_offset, header, header);
return;
}
length = v3d_group_get_length(inst);
printf("0x%08x 0x%08x: 0x%02x %s\n",
offset, hw_offset, header, v3d_group_get_name(inst));
v3d_print_group(clif, inst, offset, p);
switch (header) {
case VC4_PACKET_HALT:
case VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF:
return;
default:
break;
}
offset += length;
if (header != VC4_PACKET_GEM_HANDLES)
hw_offset += length;
p += length;
}
clif_dump_destroy(clif);
}
//Textures in T format:
//formed out of 4KB tiles, which have 1KB subtiles (see page 105 in VC4 arch guide)
//1KB subtiles have 512b microtiles.
//Width/height of the 512b microtiles is the following:
// 64bpp: 2x4
// 32bpp: 4x4
// 16bpp: 8x4
// 8bpp: 8x8
// 4bpp: 16x8
// 1bpp: 32x16
//Therefore width/height of 1KB subtiles is the following:
// 64bpp: 8x16
// 32bpp: 16x16
// 16bpp: 32x16
// 8bpp: 32x32
// 4bpp: 64x32
// 1bpp: 128x64
//Finally width/height of the 4KB tiles:
// 64bpp: 16x32
// 32bpp: 32x32
// 16bpp: 64x32
// 8bpp: 64x64
// 4bpp: 128x64
// 1bpp: 256x128
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight)
{
assert(paddedWidth);
assert(paddedHeight);
uint32_t tileW = 0;
uint32_t tileH = 0;
switch(bpp)
{
case 64:
{
tileW = 16;
tileH = 32;
break;
}
case 32:
{
tileW = 32;
tileH = 32;
break;
}
case 16:
{
tileW = 64;
tileH = 32;
break;
}
case 8:
{
tileW = 64;
tileH = 64;
break;
}
case 4:
{
tileW = 128;
tileH = 64;
break;
}
case 1:
{
tileW = 256;
tileH = 128;
break;
}
default:
{
assert(0); //unsupported
}
}
*paddedWidth = ((tileW - (width % tileW)) % tileW) + width;
*paddedHeight = ((tileH - (height % tileH)) % tileH) + height;
}
uint32_t getFormatBpp(VkFormat f)
{
switch(f)
{
case VK_FORMAT_R16G16B16A16_SFLOAT:
return 64;
case VK_FORMAT_R8G8B8_UNORM: //padded to 32
case VK_FORMAT_R8G8B8A8_UNORM:
return 32;
return 32;
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
case VK_FORMAT_R5G6B5_UNORM_PACK16:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R16_SFLOAT:
case VK_FORMAT_R16_SINT:
return 16;
case VK_FORMAT_R8_UNORM:
case VK_FORMAT_R8_SINT:
return 8;
default:
assert(0);
return 0;
}
}
void createImageBO(_image* i)
{
assert(i);
assert(i->format);
assert(i->width);
assert(i->height);
uint32_t bpp = getFormatBpp(i->format);
uint32_t pixelSizeBytes = bpp / 8;
uint32_t nonPaddedSize = i->width * i->height * pixelSizeBytes;
i->paddedWidth = i->width;
i->paddedHeight = i->height;
//need to pad to T format, as HW automatically chooses that
if(nonPaddedSize > 4096)
{
getPaddedTextureDimensionsT(i->width, i->height, bpp, &i->paddedWidth, &i->paddedHeight);
}
i->size = i->paddedWidth * i->paddedHeight * pixelSizeBytes;
i->stride = i->paddedWidth * pixelSizeBytes;
i->handle = vc4_bo_alloc(controlFd, i->size, "swapchain image"); assert(i->handle);
//set tiling to T if size > 4KB
if(nonPaddedSize > 4096)
{
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret);
i->tiling = VC4_TILING_FORMAT_T;
}
else
{
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_LINEAR); assert(ret);
i->tiling = VC4_TILING_FORMAT_LT;
}
}
/*static inline void util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)
{
ubyte r = 0;
ubyte g = 0;
ubyte b = 0;
ubyte a = 0;
if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) <= 8) {
r = float_to_ubyte(rgba[0]);
g = float_to_ubyte(rgba[1]);
b = float_to_ubyte(rgba[2]);
a = float_to_ubyte(rgba[3]);
}
switch (format) {
case PIPE_FORMAT_ABGR8888_UNORM:
{
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | a;
}
return;
case PIPE_FORMAT_XBGR8888_UNORM:
{
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | 0xff;
}
return;
case PIPE_FORMAT_BGRA8888_UNORM:
{
uc->ui[0] = (a << 24) | (r << 16) | (g << 8) | b;
}
return;
case PIPE_FORMAT_BGRX8888_UNORM:
{
uc->ui[0] = (0xffu << 24) | (r << 16) | (g << 8) | b;
}
return;
case PIPE_FORMAT_ARGB8888_UNORM:
{
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | a;
}
return;
case PIPE_FORMAT_XRGB8888_UNORM:
{
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | 0xff;
}
return;
case PIPE_FORMAT_B5G6R5_UNORM:
{
uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
}
return;
case PIPE_FORMAT_B5G5R5X1_UNORM:
{
uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
}
return;
case PIPE_FORMAT_B5G5R5A1_UNORM:
{
uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
}
return;
case PIPE_FORMAT_B4G4R4A4_UNORM:
{
uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
}
return;
case PIPE_FORMAT_A8_UNORM:
{
uc->ub = a;
}
return;
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
{
uc->ub = r;
}
return;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
{
uc->f[0] = rgba[0];
uc->f[1] = rgba[1];
uc->f[2] = rgba[2];
uc->f[3] = rgba[3];
}
return;
case PIPE_FORMAT_R32G32B32_FLOAT:
{
uc->f[0] = rgba[0];
uc->f[1] = rgba[1];
uc->f[2] = rgba[2];
}
return;
default:
util_format_write_4f(format, rgba, 0, uc, 0, 0, 0, 1, 1);
}
}*/
uint32_t packVec4IntoABGR8(const float rgba[4])
{
uint8_t r, g, b, a;
r = rgba[0] * 255.0;
g = rgba[1] * 255.0;
b = rgba[2] * 255.0;
a = rgba[3] * 255.0;
uint32_t res = 0 |
(a << 24) |
(b << 16) |
(g << 8) |
(r << 0);
return res;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateInstanceExtensionProperties
* When pLayerName parameter is NULL, only extensions provided by the Vulkan implementation or by implicitly enabled layers are returned. When pLayerName is the name of a layer,
* the instance extensions provided by that layer are returned.
* If pProperties is NULL, then the number of extensions properties available is returned in pPropertyCount. Otherwise, pPropertyCount must point to a variable set by the user
* to the number of elements in the pProperties array, and on return the variable is overwritten with the number of structures actually written to pProperties.
* If pPropertyCount is less than the number of extension properties available, at most pPropertyCount structures will be written. If pPropertyCount is smaller than the number of extensions available,
* VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the available properties were returned.
* Because the list of available layers may change externally between calls to vkEnumerateInstanceExtensionProperties,
* two calls may retrieve different results if a pLayerName is available in one call but not in another. The extensions supported by a layer may also change between two calls,
* e.g. if the layer implementation is replaced by a different version between those calls.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties(
const char* pLayerName,
uint32_t* pPropertyCount,
VkExtensionProperties* pProperties)
{
assert(!pLayerName); //TODO layers ignored for now
assert(pPropertyCount);
if(!pProperties)
{
*pPropertyCount = numInstanceExtensions;
return VK_INCOMPLETE;
}
int arraySize = *pPropertyCount;
int elementsWritten = min(numInstanceExtensions, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pProperties[c] = instanceExtensions[c];
}
*pPropertyCount = elementsWritten;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateInstance
* There is no global state in Vulkan and all per-application state is stored in a VkInstance object. Creating a VkInstance object initializes the Vulkan library
* vkCreateInstance verifies that the requested layers exist. If not, vkCreateInstance will return VK_ERROR_LAYER_NOT_PRESENT. Next vkCreateInstance verifies that
* the requested extensions are supported (e.g. in the implementation or in any enabled instance layer) and if any requested extension is not supported,
* vkCreateInstance must return VK_ERROR_EXTENSION_NOT_PRESENT. After verifying and enabling the instance layers and extensions the VkInstance object is
* created and returned to the application.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkInstance* pInstance)
{
assert(pInstance);
assert(pCreateInfo);
*pInstance = malloc(sizeof(_instance));
if(!*pInstance)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
(*pInstance)->numEnabledExtensions = 0;
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO: possibly we need to load layers here
//and store them in pInstance
assert(pCreateInfo->enabledLayerCount == 0);
if(pCreateInfo->enabledExtensionCount)
{
assert(pCreateInfo->ppEnabledExtensionNames);
}
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
{
int findres = findInstanceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
if(findres > -1)
{
(*pInstance)->enabledExtensions[(*pInstance)->numEnabledExtensions] = findres;
(*pInstance)->numEnabledExtensions++;
}
else
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
}
//TODO ignored for now
//pCreateInfo->pApplicationInfo
int ret = openIoctl(); assert(!ret);
(*pInstance)->chipVersion = vc4_get_chip_info(controlFd);
(*pInstance)->hasTiling = vc4_test_tiling(controlFd);
(*pInstance)->hasControlFlow = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
(*pInstance)->hasEtc1 = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_ETC1);
(*pInstance)->hasThreadedFs = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
(*pInstance)->hasMadvise = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_MADVISE);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#devsandqueues-physical-device-enumeration
* If pPhysicalDevices is NULL, then the number of physical devices available is returned in pPhysicalDeviceCount. Otherwise, pPhysicalDeviceCount must point to a
* variable set by the user to the number of elements in the pPhysicalDevices array, and on return the variable is overwritten with the number of handles actually
* written to pPhysicalDevices. If pPhysicalDeviceCount is less than the number of physical devices available, at most pPhysicalDeviceCount structures will be written.
* If pPhysicalDeviceCount is smaller than the number of physical devices available, VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the
* available physical devices were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices(
VkInstance instance,
uint32_t* pPhysicalDeviceCount,
VkPhysicalDevice* pPhysicalDevices)
{
assert(instance);
//TODO is there a way to check if there's a gpu (and it's the rPi)?
int gpuExists = access( "/dev/dri/card0", F_OK ) != -1;
int numGPUs = gpuExists;
assert(pPhysicalDeviceCount);
if(!pPhysicalDevices)
{
*pPhysicalDeviceCount = numGPUs;
return VK_SUCCESS;
}
int arraySize = *pPhysicalDeviceCount;
int elementsWritten = min(numGPUs, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pPhysicalDevices[c] = &instance->dev;
}
*pPhysicalDeviceCount = elementsWritten;
if(elementsWritten < arraySize)
{
return VK_INCOMPLETE;
}
else
{
return VK_SUCCESS;
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceProperties
*/
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties* pProperties)
{
assert(physicalDevice);
assert(pProperties);
VkPhysicalDeviceSparseProperties sparseProps =
{
.residencyStandard2DBlockShape = 1,
.residencyStandard2DMultisampleBlockShape = 1,
.residencyStandard3DBlockShape = 1,
.residencyAlignedMipSize = 1,
.residencyNonResidentStrict = 1
};
pProperties->apiVersion = VK_MAKE_VERSION(1,1,0);
pProperties->driverVersion = 1; //we'll simply call this v1
pProperties->vendorID = 0x14E4; //Broadcom
pProperties->deviceID = 0; //TODO dunno?
pProperties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
strcpy(pProperties->deviceName, "VideoCore IV HW");
//pProperties->pipelineCacheUUID
pProperties->limits = _limits;
pProperties->sparseProperties = sparseProps;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceFeatures
*/
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures* pFeatures)
{
assert(physicalDevice);
assert(pFeatures);
*pFeatures = _features;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateDeviceExtensionProperties
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties(
VkPhysicalDevice physicalDevice,
const char* pLayerName,
uint32_t* pPropertyCount,
VkExtensionProperties* pProperties)
{
assert(physicalDevice);
assert(!pLayerName); //layers ignored for now
assert(pPropertyCount);
if(!pProperties)
{
*pPropertyCount = numDeviceExtensions;
return VK_INCOMPLETE;
}
int arraySize = *pPropertyCount;
int elementsWritten = min(numDeviceExtensions, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pProperties[c] = deviceExtensions[c];
}
*pPropertyCount = elementsWritten;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceQueueFamilyProperties
* If pQueueFamilyProperties is NULL, then the number of queue families available is returned in pQueueFamilyPropertyCount.
* Otherwise, pQueueFamilyPropertyCount must point to a variable set by the user to the number of elements in the pQueueFamilyProperties array,
* and on return the variable is overwritten with the number of structures actually written to pQueueFamilyProperties. If pQueueFamilyPropertyCount
* is less than the number of queue families available, at most pQueueFamilyPropertyCount structures will be written.
*/
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties(
VkPhysicalDevice physicalDevice,
uint32_t* pQueueFamilyPropertyCount,
VkQueueFamilyProperties* pQueueFamilyProperties)
{
assert(physicalDevice);
assert(pQueueFamilyPropertyCount);
if(!pQueueFamilyProperties)
{
*pQueueFamilyPropertyCount = 1;
return;
}
int arraySize = *pQueueFamilyPropertyCount;
int elementsWritten = min(numQueueFamilies, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pQueueFamilyProperties[c] = _queueFamilyProperties[c];
}
*pQueueFamilyPropertyCount = elementsWritten;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceSupportKHR
* does this queue family support presentation to this surface?
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
VkSurfaceKHR surface,
VkBool32* pSupported)
{
assert(pSupported);
assert(surface);
assert(physicalDevice);
assert(queueFamilyIndex < numQueueFamilies);
//TODO if we plan to support headless rendering, there should be 2 families
//one using /dev/dri/card0 which has modesetting
//other using /dev/dri/renderD128 which does not support modesetting, this would say false here
*pSupported = VK_TRUE;
return VK_SUCCESS;
}
/*
* Implementation of our RPI specific "extension"
*/
VkResult vkCreateRpiSurfaceKHR(
VkInstance instance,
const VkRpiSurfaceCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
{
assert(instance);
//assert(pCreateInfo); //ignored for now
assert(pSurface);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
*pSurface = (VkSurfaceKHR)modeset_create(controlFd);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySurfaceKHR
* Destroying a VkSurfaceKHR merely severs the connection between Vulkan and the native surface,
* and does not imply destroying the native surface, closing a window, or similar behavior
* (but we'll do so anyways...)
*/
VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR(
VkInstance instance,
VkSurfaceKHR surface,
const VkAllocationCallbacks* pAllocator)
{
assert(instance);
assert(surface);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
modeset_destroy(controlFd, (modeset_dev*)surface);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateDevice
* vkCreateDevice verifies that extensions and features requested in the ppEnabledExtensionNames and pEnabledFeatures
* members of pCreateInfo, respectively, are supported by the implementation. If any requested extension is not supported,
* vkCreateDevice must return VK_ERROR_EXTENSION_NOT_PRESENT. If any requested feature is not supported, vkCreateDevice must return
* VK_ERROR_FEATURE_NOT_PRESENT. Support for extensions can be checked before creating a device by querying vkEnumerateDeviceExtensionProperties
* After verifying and enabling the extensions the VkDevice object is created and returned to the application.
* If a requested extension is only supported by a layer, both the layer and the extension need to be specified at vkCreateInstance
* time for the creation to succeed. Multiple logical devices can be created from the same physical device. Logical device creation may
* fail due to lack of device-specific resources (in addition to the other errors). If that occurs, vkCreateDevice will return VK_ERROR_TOO_MANY_OBJECTS.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(
VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDevice* pDevice)
{
assert(physicalDevice);
assert(pDevice);
assert(pCreateInfo);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
*pDevice = malloc(sizeof(_device));
if(!pDevice)
{
return VK_ERROR_TOO_MANY_OBJECTS;
}
(*pDevice)->dev = physicalDevice;
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
{
int findres = findDeviceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
if(findres > -1)
{
(*pDevice)->enabledExtensions[(*pDevice)->numEnabledExtensions] = findres;
(*pDevice)->numEnabledExtensions++;
}
else
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
}
VkBool32* requestedFeatures = pCreateInfo->pEnabledFeatures;
VkBool32* supportedFeatures = &_features;
if(requestedFeatures)
{
for(int c = 0; c < numFeatures; ++c)
{
if(requestedFeatures[c] && !supportedFeatures[c])
{
return VK_ERROR_FEATURE_NOT_PRESENT;
}
}
(*pDevice)->enabledFeatures = *pCreateInfo->pEnabledFeatures;
}
else
{
memset(&(*pDevice)->enabledFeatures, 0, sizeof((*pDevice)->enabledFeatures)); //just disable everything
}
//layers ignored per spec
//pCreateInfo->enabledLayerCount
for(int c = 0; c < numQueueFamilies; ++c)
{
(*pDevice)->queues[c] = 0;
}
if(pCreateInfo->queueCreateInfoCount > 0)
{
for(int c = 0; c < pCreateInfo->queueCreateInfoCount; ++c)
{
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = malloc(sizeof(_queue)*pCreateInfo->pQueueCreateInfos[c].queueCount);
if(!(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for(int d = 0; d < pCreateInfo->pQueueCreateInfos[c].queueCount; ++d)
{
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex][d].lastEmitSeqno = 0;
}
(*pDevice)->numQueues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = pCreateInfo->pQueueCreateInfos[c].queueCount;
}
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetDeviceQueue
* vkGetDeviceQueue must only be used to get queues that were created with the flags parameter of VkDeviceQueueCreateInfo set to zero.
* To get queues that were created with a non-zero flags parameter use vkGetDeviceQueue2.
*/
VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue(
VkDevice device,
uint32_t queueFamilyIndex,
uint32_t queueIndex,
VkQueue* pQueue)
{
assert(device);
assert(pQueue);
assert(queueFamilyIndex < numQueueFamilies);
assert(queueIndex < device->numQueues[queueFamilyIndex]);
*pQueue = &device->queues[queueFamilyIndex][queueIndex];
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSemaphore
* Semaphores are a synchronization primitive that can be used to insert a dependency between batches submitted to queues.
* Semaphores have two states - signaled and unsignaled. The state of a semaphore can be signaled after execution of a batch of commands is completed.
* A batch can wait for a semaphore to become signaled before it begins execution, and the semaphore is also unsignaled before the batch begins execution.
* As with most objects in Vulkan, semaphores are an interface to internal data which is typically opaque to applications.
* This internal data is referred to as a semaphores payload. However, in order to enable communication with agents outside of the current device,
* it is necessary to be able to export that payload to a commonly understood format, and subsequently import from that format as well.
* The internal data of a semaphore may include a reference to any resources and pending work associated with signal or unsignal operations performed on that semaphore object.
* Mechanisms to import and export that internal data to and from semaphores are provided below.
* These mechanisms indirectly enable applications to share semaphore state between two or more semaphores and other synchronization primitives across process and API boundaries.
* When created, the semaphore is in the unsignaled state.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(
VkDevice device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSemaphore* pSemaphore)
{
assert(device);
assert(pSemaphore);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//we'll probably just use an IOCTL to wait for a GPU sequence number to complete.
sem_t* s = malloc(sizeof(sem_t));
if(!s)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
sem_init(s, 0, 0); //create semaphore unsignalled, shared between threads
*pSemaphore = (VkSemaphore)s;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceCapabilitiesKHR
* The capabilities of a swapchain targetting a surface are the intersection of the capabilities of the WSI platform,
* the native window or display, and the physical device. The resulting capabilities can be obtained with the queries listed
* below in this section. Capabilities that correspond to image creation parameters are not independent of each other:
* combinations of parameters that are not supported as reported by vkGetPhysicalDeviceImageFormatProperties are not supported
* by the surface on that physical device, even if the capabilities taken individually are supported as part of some other parameter combinations.
*
* capabilities the specified device supports for a swapchain created for the surface
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
{
assert(physicalDevice);
assert(surface);
assert(pSurfaceCapabilities);
pSurfaceCapabilities->minImageCount = 1; //min 1
pSurfaceCapabilities->maxImageCount = 2; //TODO max 2 for double buffering for now...
pSurfaceCapabilities->currentExtent.width = ((modeset_dev*)surface)->width;
pSurfaceCapabilities->currentExtent.height = ((modeset_dev*)surface)->height;
pSurfaceCapabilities->minImageExtent.width = ((modeset_dev*)surface)->width; //TODO
pSurfaceCapabilities->minImageExtent.height = ((modeset_dev*)surface)->height; //TODO
pSurfaceCapabilities->maxImageExtent.width = ((modeset_dev*)surface)->width; //TODO
pSurfaceCapabilities->maxImageExtent.height = ((modeset_dev*)surface)->height; //TODO
pSurfaceCapabilities->maxImageArrayLayers = 1; //TODO maybe more layers for cursor etc.
pSurfaceCapabilities->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO no rotation for now
pSurfaceCapabilities->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO get this from dev
pSurfaceCapabilities->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; //TODO no alpha compositing for now
pSurfaceCapabilities->supportedUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; //well we want to draw on the screen right
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceFormatsKHR
* If pSurfaceFormats is NULL, then the number of format pairs supported for the given surface is returned in pSurfaceFormatCount.
* The number of format pairs supported will be greater than or equal to 1. Otherwise, pSurfaceFormatCount must point to a variable
* set by the user to the number of elements in the pSurfaceFormats array, and on return the variable is overwritten with the number
* of structures actually written to pSurfaceFormats. If the value of pSurfaceFormatCount is less than the number of format pairs supported,
* at most pSurfaceFormatCount structures will be written. If pSurfaceFormatCount is smaller than the number of format pairs supported for the given surface,
* VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormatKHR* pSurfaceFormats)
{
assert(physicalDevice);
assert(surface);
assert(pSurfaceFormatCount);
const int numFormats = 1;
if(!pSurfaceFormats)
{
*pSurfaceFormatCount = numFormats;
return VK_SUCCESS;
}
int arraySize = *pSurfaceFormatCount;
int elementsWritten = min(numFormats, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pSurfaceFormats[c] = supportedSurfaceFormats[c];
}
*pSurfaceFormatCount = elementsWritten;
if(elementsWritten < numFormats)
{
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfacePresentModesKHR
* If pPresentModes is NULL, then the number of presentation modes supported for the given surface is returned in pPresentModeCount.
* Otherwise, pPresentModeCount must point to a variable set by the user to the number of elements in the pPresentModes array,
* and on return the variable is overwritten with the number of values actually written to pPresentModes.
* If the value of pPresentModeCount is less than the number of presentation modes supported, at most pPresentModeCount values will be written.
* If pPresentModeCount is smaller than the number of presentation modes supported for the given surface, VK_INCOMPLETE will be returned instead of
* VK_SUCCESS to indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pPresentModeCount,
VkPresentModeKHR* pPresentModes)
{
assert(physicalDevice);
assert(surface);
assert(pPresentModeCount);
const int numModes = 1;
if(!pPresentModes)
{
*pPresentModeCount = numModes;
return VK_SUCCESS;
}
int arraySize = *pPresentModeCount;
int elementsWritten = min(numModes, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
//TODO
pPresentModes[c] = VK_PRESENT_MODE_FIFO_KHR;
}
*pPresentModeCount = elementsWritten;
if(elementsWritten < numModes)
{
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSwapchainKHR
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR(
VkDevice device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSwapchainKHR* pSwapchain)
{
assert(device);
assert(pCreateInfo);
assert(pSwapchain);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
*pSwapchain = malloc(sizeof(_swapchain));
if(!*pSwapchain)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
_swapchain* s = *pSwapchain;
//TODO flags, layers, queue sharing, pretransform, composite alpha, present mode..., clipped, oldswapchain
//TODO external sync on surface, oldswapchain
s->images = malloc(sizeof(_image) * pCreateInfo->minImageCount);
if(!s->images)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
s->backbufferIdx = 0;
s->numImages = pCreateInfo->minImageCount;
s->surface = pCreateInfo->surface;
for(int c = 0; c < pCreateInfo->minImageCount; ++c)
{
s->images[c].width = pCreateInfo->imageExtent.width;
s->images[c].height = pCreateInfo->imageExtent.height;
s->images[c].depth = 1;
s->images[c].layers = pCreateInfo->imageArrayLayers;
s->images[c].miplevels = 1;
s->images[c].samples = 1; //TODO
s->images[c].usageBits = pCreateInfo->imageUsage;
s->images[c].format = pCreateInfo->imageFormat;
s->images[c].imageSpace = pCreateInfo->imageColorSpace;
s->images[c].concurrentAccess = pCreateInfo->imageSharingMode;
s->images[c].numQueueFamiliesWithAccess = pCreateInfo->queueFamilyIndexCount;
if(s->images[c].concurrentAccess)
{
s->images[c].queueFamiliesWithAccess = malloc(sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
memcpy(s->images[c].queueFamiliesWithAccess, pCreateInfo->pQueueFamilyIndices, sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
}
s->images[c].preTransformMode = pCreateInfo->preTransform;
s->images[c].compositeAlpha = pCreateInfo->compositeAlpha;
s->images[c].presentMode = pCreateInfo->presentMode;
s->images[c].clipped = pCreateInfo->clipped;
createImageBO(&s->images[c]);
int res = modeset_create_fb(controlFd, &s->images[c]); assert(res == 0);
}
//defer to first swapbuffer (or at least later, getting swapchain != presenting immediately)
//int res = modeset_fb_for_dev(controlFd, s->surface, &s->images[s->backbufferIdx]); assert(res == 0);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetSwapchainImagesKHR
* If pSwapchainImages is NULL, then the number of presentable images for swapchain is returned in pSwapchainImageCount.
* Otherwise, pSwapchainImageCount must point to a variable set by the user to the number of elements in the pSwapchainImages array,
* and on return the variable is overwritten with the number of structures actually written to pSwapchainImages.
* If the value of pSwapchainImageCount is less than the number of presentable images for swapchain, at most pSwapchainImageCount structures will be written.
* If pSwapchainImageCount is smaller than the number of presentable images for swapchain, VK_INCOMPLETE will be returned instead of VK_SUCCESS to
* indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR(
VkDevice device,
VkSwapchainKHR swapchain,
uint32_t* pSwapchainImageCount,
VkImage* pSwapchainImages)
{
assert(device);
assert(swapchain);
assert(pSwapchainImageCount);
_swapchain* s = swapchain;
if(!pSwapchainImages)
{
*pSwapchainImageCount = s->numImages;
return VK_SUCCESS;
}
int arraySize = *pSwapchainImageCount;
int elementsWritten = min(s->numImages, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pSwapchainImages[c] = &s->images[c];
}
*pSwapchainImageCount = elementsWritten;
if(elementsWritten < s->numImages)
{
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffers-pools
* Command pools are opaque objects that command buffer memory is allocated from, and which allow the implementation to amortize the
* cost of resource creation across multiple command buffers. Command pools are externally synchronized, meaning that a command pool must
* not be used concurrently in multiple threads. That includes use via recording commands on any command buffers allocated from the pool,
* as well as operations that allocate, free, and reset command buffers or the pool itself.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool(
VkDevice device,
const VkCommandPoolCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkCommandPool* pCommandPool)
{
assert(device);
assert(pCreateInfo);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//VK_COMMAND_POOL_CREATE_TRANSIENT_BIT
//specifies that command buffers allocated from the pool will be short-lived, meaning that they will be reset or freed in a relatively short timeframe.
//This flag may be used by the implementation to control memory allocation behavior within the pool.
//--> definitely use pool allocator
//VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
//allows any command buffer allocated from a pool to be individually reset to the initial state; either by calling vkResetCommandBuffer, or via the implicit reset when calling vkBeginCommandBuffer.
//If this flag is not set on a pool, then vkResetCommandBuffer must not be called for any command buffer allocated from that pool.
//TODO pool family ignored for now
_commandPool* cp = malloc(sizeof(_commandPool));
if(!cp)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->queueFamilyIndex = pCreateInfo->queueFamilyIndex;
//initial number of command buffers to hold
int numCommandBufs = 100;
int controlListSize = ARM_PAGE_SIZE * 100;
//if(pCreateInfo->flags & VK_COMMAND_POOL_CREATE_TRANSIENT_BIT)
{
//use pool allocator
void* pamem = malloc(numCommandBufs * sizeof(_commandBuffer));
if(!pamem)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->pa = createPoolAllocator(pamem, sizeof(_commandBuffer), numCommandBufs * sizeof(_commandBuffer));
void* cpamem = malloc(controlListSize);
if(!cpamem)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->cpa = createConsecutivePoolAllocator(cpamem, ARM_PAGE_SIZE, controlListSize);
}
*pCommandPool = (VkCommandPool)cp;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffer-allocation
* vkAllocateCommandBuffers can be used to create multiple command buffers. If the creation of any of those command buffers fails,
* the implementation must destroy all successfully created command buffer objects from this command, set all entries of the pCommandBuffers array to NULL and return the error.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
VkDevice device,
const VkCommandBufferAllocateInfo* pAllocateInfo,
VkCommandBuffer* pCommandBuffers)
{
assert(device);
assert(pAllocateInfo);
assert(pCommandBuffers);
VkResult res = VK_SUCCESS;
_commandPool* cp = (_commandPool*)pAllocateInfo->commandPool;
//if(cp->usePoolAllocator)
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
pCommandBuffers[c] = poolAllocate(&cp->pa);
if(!pCommandBuffers[c])
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
pCommandBuffers[c]->shaderRecCount = 0;
pCommandBuffers[c]->usageFlags = 0;
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
pCommandBuffers[c]->cp = cp;
clInit(&pCommandBuffers[c]->binCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->handlesCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1));
if(!pCommandBuffers[c]->binCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->handlesCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->shaderRecCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->uniformsCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
}
}
if(res != VK_SUCCESS)
{
//if(cp->usePoolAllocator)
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
pCommandBuffers[c] = 0;
}
}
}
return res;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkBeginCommandBuffer
*/
VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer(
VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo* pBeginInfo)
{
assert(commandBuffer);
assert(pBeginInfo);
//TODO
//VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
//specifies that each recording of the command buffer will only be submitted once, and the command buffer will be reset and recorded again between each submission.
//VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT
//specifies that a secondary command buffer is considered to be entirely inside a render pass. If this is a primary command buffer, then this bit is ignored
//VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT
//specifies that a command buffer can be resubmitted to a queue while it is in the pending state, and recorded into multiple primary command buffers
//When a command buffer begins recording, all state in that command buffer is undefined
struct drm_vc4_submit_cl submitCl =
{
.color_read.hindex = ~0,
.zs_read.hindex = ~0,
.color_write.hindex = ~0,
.msaa_color_write.hindex = ~0,
.zs_write.hindex = ~0,
.msaa_zs_write.hindex = ~0,
};
commandBuffer->usageFlags = pBeginInfo->flags;
commandBuffer->shaderRecCount = 0;
commandBuffer->state = CMDBUF_STATE_RECORDING;
commandBuffer->submitCl = submitCl;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdPipelineBarrier
* vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass.
* When vkCmdPipelineBarrier is submitted to a queue, it defines a memory dependency between commands that were submitted before it, and those submitted after it.
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the first synchronization scope includes all commands that occur earlier in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the first synchronization scope includes only commands that occur earlier in submission order within the same subpass.
* In either case, the first synchronization scope is limited to operations on the pipeline stages determined by the source stage mask specified by srcStageMask.
*
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the second synchronization scope includes all commands that occur later in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the second synchronization scope includes only commands that occur later in submission order within the same subpass.
* In either case, the second synchronization scope is limited to operations on the pipeline stages determined by the destination stage mask specified by dstStageMask.
*
* The first access scope is limited to access in the pipeline stages determined by the source stage mask specified by srcStageMask.
* Within that, the first access scope only includes the first access scopes defined by elements of the pMemoryBarriers,
* pBufferMemoryBarriers and pImageMemoryBarriers arrays, which each define a set of memory barriers. If no memory barriers are specified,
* then the first access scope includes no accesses.
*
* The second access scope is limited to access in the pipeline stages determined by the destination stage mask specified by dstStageMask.
* Within that, the second access scope only includes the second access scopes defined by elements of the pMemoryBarriers, pBufferMemoryBarriers and pImageMemoryBarriers arrays,
* which each define a set of memory barriers. If no memory barriers are specified, then the second access scope includes no accesses.
*
* If dependencyFlags includes VK_DEPENDENCY_BY_REGION_BIT, then any dependency between framebuffer-space pipeline stages is framebuffer-local - otherwise it is framebuffer-global.
*/
VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier(
VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
VkDependencyFlags dependencyFlags,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
assert(commandBuffer);
//TODO pipeline stage flags
//VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
//VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
//VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
//VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
//VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
//VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
//VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
//VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
//VK_PIPELINE_STAGE_TRANSFER_BIT
//VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
//VK_PIPELINE_STAGE_HOST_BIT
//VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
//VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
//TODO dependency flags
//VK_DEPENDENCY_BY_REGION_BIT,
//VK_DEPENDENCY_DEVICE_GROUP_BIT,
//VK_DEPENDENCY_VIEW_LOCAL_BIT
//TODO access flags
//VK_ACCESS_INDIRECT_COMMAND_READ_BIT
//VK_ACCESS_INDEX_READ_BIT
//VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
//VK_ACCESS_UNIFORM_READ_BIT
//VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
//VK_ACCESS_SHADER_READ_BIT
//VK_ACCESS_SHADER_WRITE_BIT
//VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
//VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
//VK_ACCESS_TRANSFER_READ_BIT
//VK_ACCESS_TRANSFER_WRITE_BIT
//VK_ACCESS_HOST_READ_BIT
//VK_ACCESS_HOST_WRITE_BIT
//VK_ACCESS_MEMORY_READ_BIT
//VK_ACCESS_MEMORY_WRITE_BIT
//VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX
//VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX
//TODO Layout transition flags
//VK_IMAGE_LAYOUT_UNDEFINED
//VK_IMAGE_LAYOUT_GENERAL
//VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
//VK_IMAGE_LAYOUT_PREINITIALIZED
//VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
//VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR
for(int c = 0; c < memoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < bufferMemoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < imageMemoryBarrierCount; ++c)
{
_image* i = pImageMemoryBarriers[c].image;
assert(i->layout == pImageMemoryBarriers[c].oldLayout || i->layout == VK_IMAGE_LAYOUT_UNDEFINED);
if(srcStageMask & VK_PIPELINE_STAGE_TRANSFER_BIT &&
pImageMemoryBarriers[c].srcAccessMask & VK_ACCESS_TRANSFER_WRITE_BIT &&
i->needToClear)
{
//insert CRs to clear the image
assert(i->layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
0, 0, 0, 0,
getFormatBpp(i->format) == 64, //64 bit color mode
i->samples > 1, //msaa
i->width, i->height, 0, 0, 0);
//START_TILE_BINNING resets the statechange counters in the hardware,
//which are what is used when a primitive is binned to a tile to
//figure out what new state packets need to be written to that tile's
//command list.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
clInsertStartTileBinning(&commandBuffer->binCl);
//Reset the current compressed primitives format. This gets modified
//by VC4_PACKET_GL_INDEXED_PRIMITIVE and
//VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
//of every tile.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length);
clInsertPrimitiveListFormat(&commandBuffer->binCl,
1, //16 bit
2); //tris
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, i->handle);
commandBuffer->submitCl.color_write.hindex = idx;
commandBuffer->submitCl.color_write.offset = 0;
commandBuffer->submitCl.color_write.flags = 0;
//TODO format
commandBuffer->submitCl.color_write.bits =
VC4_SET_FIELD(VC4_RENDER_CONFIG_FORMAT_RGBA8888, VC4_RENDER_CONFIG_FORMAT) |
VC4_SET_FIELD(i->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
commandBuffer->submitCl.clear_color[0] = i->clearColor[0];
commandBuffer->submitCl.clear_color[1] = i->clearColor[1];
//TODO ranges
commandBuffer->submitCl.min_x_tile = 0;
commandBuffer->submitCl.min_y_tile = 0;
uint32_t tileSizeW = 64;
uint32_t tileSizeH = 64;
if(i->samples > 1)
{
tileSizeW >>= 1;
tileSizeH >>= 1;
}
if(getFormatBpp(i->format) == 64)
{
tileSizeH >>= 1;
}
uint32_t widthInTiles = divRoundUp(i->width, tileSizeW);
uint32_t heightInTiles = divRoundUp(i->height, tileSizeH);
commandBuffer->submitCl.max_x_tile = widthInTiles - 1;
commandBuffer->submitCl.max_y_tile = heightInTiles - 1;
commandBuffer->submitCl.width = i->width;
commandBuffer->submitCl.height = i->height;
commandBuffer->submitCl.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
commandBuffer->submitCl.clear_z = 0; //TODO
commandBuffer->submitCl.clear_s = 0;
}
//transition to new layout
i->layout = pImageMemoryBarriers[c].newLayout;
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdClearColorImage
* Color and depth/stencil images can be cleared outside a render pass instance using vkCmdClearColorImage or vkCmdClearDepthStencilImage, respectively.
* These commands are only allowed outside of a render pass instance.
*/
VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage(
VkCommandBuffer commandBuffer,
VkImage image,
VkImageLayout imageLayout,
const VkClearColorValue* pColor,
uint32_t rangeCount,
const VkImageSubresourceRange* pRanges)
{
assert(commandBuffer);
assert(image);
assert(pColor);
//TODO this should only flag an image for clearing. This can only be called outside a renderpass
//actual clearing would only happen:
// -if image is rendered to (insert clear before first draw call)
// -if the image is bound for sampling (submit a CL with a clear)
// -if a command buffer is submitted without any rendering (insert clear)
// -etc.
//we shouldn't clear an image if noone uses it
//TODO ranges support
assert(imageLayout == VK_IMAGE_LAYOUT_GENERAL ||
imageLayout == VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR ||
imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
assert(commandBuffer->state == CMDBUF_STATE_RECORDING);
assert(_queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_GRAPHICS_BIT || _queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_COMPUTE_BIT);
_image* i = image;
assert(i->usageBits & VK_IMAGE_USAGE_TRANSFER_DST_BIT);
//TODO externally sync cmdbuf, cmdpool
i->needToClear = 1;
i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEndCommandBuffer
* If there was an error during recording, the application will be notified by an unsuccessful return code returned by vkEndCommandBuffer.
* If the application wishes to further use the command buffer, the command buffer must be reset. The command buffer must have been in the recording state,
* and is moved to the executable state.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer(
VkCommandBuffer commandBuffer)
{
assert(commandBuffer);
//Increment the semaphore indicating that binning is done and
//unblocking the render thread. Note that this doesn't act
//until the FLUSH completes.
//The FLUSH caps all of our bin lists with a
//VC4_PACKET_RETURN.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length);
clInsertIncrementSemaphore(&commandBuffer->binCl);
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length);
clInsertFlush(&commandBuffer->binCl);
commandBuffer->state = CMDBUF_STATE_EXECUTABLE;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkAcquireNextImageKHR
*/
VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
VkDevice device,
VkSwapchainKHR swapchain,
uint64_t timeout,
VkSemaphore semaphore,
VkFence fence,
uint32_t* pImageIndex)
{
assert(device);
assert(swapchain);
assert(semaphore != VK_NULL_HANDLE || fence != VK_NULL_HANDLE);
sem_t* s = semaphore;
//TODO we need to keep track of currently acquired images?
//TODO wait timeout?
*pImageIndex = ((_swapchain*)swapchain)->backbufferIdx; //return back buffer index
//signal semaphore
int semVal; sem_getvalue(s, &semVal); assert(semVal <= 0); //make sure semaphore is unsignalled
sem_post(s);
//TODO signal fence
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueueSubmit
* vkQueueSubmit is a queue submission command, with each batch defined by an element of pSubmits as an instance of the VkSubmitInfo structure.
* Batches begin execution in the order they appear in pSubmits, but may complete out of order.
* Fence and semaphore operations submitted with vkQueueSubmit have additional ordering constraints compared to other submission commands,
* with dependencies involving previous and subsequent queue operations. Information about these additional constraints can be found in the semaphore and
* fence sections of the synchronization chapter.
* Details on the interaction of pWaitDstStageMask with synchronization are described in the semaphore wait operation section of the synchronization chapter.
* The order that batches appear in pSubmits is used to determine submission order, and thus all the implicit ordering guarantees that respect it.
* Other than these implicit ordering guarantees and any explicit synchronization primitives, these batches may overlap or otherwise execute out of order.
* If any command buffer submitted to this queue is in the executable state, it is moved to the pending state. Once execution of all submissions of a command buffer complete,
* it moves from the pending state, back to the executable state. If a command buffer was recorded with the VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT flag,
* it instead moves back to the invalid state.
* If vkQueueSubmit fails, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced by the submitted command buffers and any semaphores
* referenced by pSubmits is unaffected by the call or its failure. If vkQueueSubmit fails in such a way that the implementation is unable to make that guarantee,
* the implementation must return VK_ERROR_DEVICE_LOST. See Lost Device.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
VkQueue queue,
uint32_t submitCount,
const VkSubmitInfo* pSubmits,
VkFence fence)
{
assert(queue);
for(int c = 0; c < pSubmits->waitSemaphoreCount; ++c)
{
sem_wait((sem_t*)pSubmits->pWaitSemaphores[c]);
}
//TODO: deal with pSubmits->pWaitDstStageMask
//TODO wait for fence??
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
{
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_EXECUTABLE)
{
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_PENDING;
}
}
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
{
VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c];
cmdbuf->submitCl.bo_handles = cmdbuf->handlesCl.buffer;
cmdbuf->submitCl.bo_handle_count = clSize(&cmdbuf->handlesCl) / 4;
cmdbuf->submitCl.bin_cl = cmdbuf->binCl.buffer;
cmdbuf->submitCl.bin_cl_size = clSize(&cmdbuf->binCl);
cmdbuf->submitCl.shader_rec = cmdbuf->shaderRecCl.buffer;
cmdbuf->submitCl.shader_rec_size = clSize(&cmdbuf->shaderRecCl);
cmdbuf->submitCl.shader_rec_count = cmdbuf->shaderRecCount;
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
printf("BCL:\n");
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
printf("BO handles: ");
for(int d = 0; d < cmdbuf->submitCl.bo_handle_count; ++d)
{
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.bo_handles)+d));
}
printf("\nwidth height: %u, %u\n", cmdbuf->submitCl.width, cmdbuf->submitCl.height);
printf("tile min/max: %u,%u %u,%u\n", cmdbuf->submitCl.min_x_tile, cmdbuf->submitCl.min_y_tile, cmdbuf->submitCl.max_x_tile, cmdbuf->submitCl.max_y_tile);
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_read.hindex, cmdbuf->submitCl.color_read.offset, cmdbuf->submitCl.color_read.bits, cmdbuf->submitCl.color_read.flags);
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_write.hindex, cmdbuf->submitCl.color_write.offset, cmdbuf->submitCl.color_write.bits, cmdbuf->submitCl.color_write.flags);
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_read.hindex, cmdbuf->submitCl.zs_read.offset, cmdbuf->submitCl.zs_read.bits, cmdbuf->submitCl.zs_read.flags);
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_write.hindex, cmdbuf->submitCl.zs_write.offset, cmdbuf->submitCl.zs_write.bits, cmdbuf->submitCl.zs_write.flags);
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_color_write.hindex, cmdbuf->submitCl.msaa_color_write.offset, cmdbuf->submitCl.msaa_color_write.bits, cmdbuf->submitCl.msaa_color_write.flags);
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_zs_write.hindex, cmdbuf->submitCl.msaa_zs_write.offset, cmdbuf->submitCl.msaa_zs_write.bits, cmdbuf->submitCl.msaa_zs_write.flags);
printf("clear color packed rgba %u %u\n", cmdbuf->submitCl.clear_color[0], cmdbuf->submitCl.clear_color[1]);
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
printf("flags %u\n", cmdbuf->submitCl.flags);
//submit ioctl
static uint64_t lastFinishedSeqno = 0;
vc4_cl_submit(controlFd, &cmdbuf->submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
}
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
{
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_PENDING)
{
if(pSubmits->pCommandBuffers[c]->usageFlags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)
{
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_INVALID;
}
else
{
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_EXECUTABLE;
}
}
}
for(int c = 0; c < pSubmits->signalSemaphoreCount; ++c)
{
sem_post((sem_t*)pSubmits->pSignalSemaphores[c]);
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueuePresentKHR
* Any writes to memory backing the images referenced by the pImageIndices and pSwapchains members of pPresentInfo,
* that are available before vkQueuePresentKHR is executed, are automatically made visible to the read access performed by the presentation engine.
* This automatic visibility operation for an image happens-after the semaphore signal operation, and happens-before the presentation engine accesses the image.
* Queueing an image for presentation defines a set of queue operations, including waiting on the semaphores and submitting a presentation request to the presentation engine.
* However, the scope of this set of queue operations does not include the actual processing of the image by the presentation engine.
* If vkQueuePresentKHR fails to enqueue the corresponding set of queue operations, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced is unaffected by the call or its failure.
* If vkQueuePresentKHR fails in such a way that the implementation is unable to make that guarantee, the implementation must return VK_ERROR_DEVICE_LOST.
* However, if the presentation request is rejected by the presentation engine with an error VK_ERROR_OUT_OF_DATE_KHR or VK_ERROR_SURFACE_LOST_KHR,
* the set of queue operations are still considered to be enqueued and thus any semaphore to be waited on gets unsignaled when the corresponding queue operation is complete.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(
VkQueue queue,
const VkPresentInfoKHR* pPresentInfo)
{
assert(queue);
assert(pPresentInfo);
//wait for semaphore in present info set by submit ioctl to make sure cls are flushed
for(int c = 0; c < pPresentInfo->waitSemaphoreCount; ++c)
{
sem_wait((sem_t*)pPresentInfo->pWaitSemaphores[c]);
}
for(int c = 0; c < pPresentInfo->swapchainCount; ++c)
{
_swapchain* s = pPresentInfo->pSwapchains[c];
modeset_present_buffer(controlFd, (modeset_dev*)s->surface, &s->images[s->backbufferIdx]);
s->backbufferIdx = (s->backbufferIdx + 1) % s->numImages;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDeviceWaitIdle
* vkDeviceWaitIdle is equivalent to calling vkQueueWaitIdle for all queues owned by device.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(
VkDevice device)
{
assert(device);
for(int c = 0; c < numQueueFamilies; ++c)
{
for(int d = 0; d < device->numQueues[c]; ++d)
{
uint64_t lastFinishedSeqno;
vc4_seqno_wait(controlFd, &lastFinishedSeqno, device->queues[c][d].lastEmitSeqno, WAIT_TIMEOUT_INFINITE);
}
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkFreeCommandBuffers
* Any primary command buffer that is in the recording or executable state and has any element of pCommandBuffers recorded into it, becomes invalid.
*/
VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers(
VkDevice device,
VkCommandPool commandPool,
uint32_t commandBufferCount,
const VkCommandBuffer* pCommandBuffers)
{
assert(device);
assert(commandPool);
assert(pCommandBuffers);
_commandPool* cp = (_commandPool*)commandPool;
for(int c = 0; c < commandBufferCount; ++c)
{
//if(cp->usePoolAllocator)
{
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
}
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyCommandPool
* When a pool is destroyed, all command buffers allocated from the pool are freed.
* Any primary command buffer allocated from another VkCommandPool that is in the recording or executable state and has a secondary command buffer
* allocated from commandPool recorded into it, becomes invalid.
*/
VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool(
VkDevice device,
VkCommandPool commandPool,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
assert(commandPool);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
_commandPool* cp = (_commandPool*)commandPool;
//if(cp->usePoolAllocator)
{
free(cp->pa.buf);
free(cp->cpa.buf);
destroyPoolAllocator(&cp->pa);
destroyConsecutivePoolAllocator(&cp->cpa);
}
free(cp);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySemaphore
*/
VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore(
VkDevice device,
VkSemaphore semaphore,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
assert(semaphore);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
sem_destroy((sem_t*)semaphore);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySwapchainKHR
*/
VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR(
VkDevice device,
VkSwapchainKHR swapchain,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
assert(swapchain);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO flush all ops
_swapchain* s = swapchain;
for(int c = 0; c < s->numImages; ++c)
{
vc4_bo_free(controlFd, s->images[c].handle, 0, s->images->size);
modeset_destroy_fb(controlFd, &s->images[c]);
}
free(s->images);
free(s);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyDevice
* To ensure that no work is active on the device, vkDeviceWaitIdle can be used to gate the destruction of the device.
* Prior to destroying a device, an application is responsible for destroying/freeing any Vulkan objects that were created using that device as the
* first parameter of the corresponding vkCreate* or vkAllocate* command
*/
VKAPI_ATTR void VKAPI_CALL vkDestroyDevice(
VkDevice device,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyInstance
*
*/
VKAPI_ATTR void VKAPI_CALL vkDestroyInstance(
VkInstance instance,
const VkAllocationCallbacks* pAllocator)
{
assert(instance);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO
closeIoctl();
}

124
driver/instance.c Normal file
View File

@ -0,0 +1,124 @@
#include "common.h"
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateInstanceExtensionProperties
* When pLayerName parameter is NULL, only extensions provided by the Vulkan implementation or by implicitly enabled layers are returned. When pLayerName is the name of a layer,
* the instance extensions provided by that layer are returned.
* If pProperties is NULL, then the number of extensions properties available is returned in pPropertyCount. Otherwise, pPropertyCount must point to a variable set by the user
* to the number of elements in the pProperties array, and on return the variable is overwritten with the number of structures actually written to pProperties.
* If pPropertyCount is less than the number of extension properties available, at most pPropertyCount structures will be written. If pPropertyCount is smaller than the number of extensions available,
* VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the available properties were returned.
* Because the list of available layers may change externally between calls to vkEnumerateInstanceExtensionProperties,
* two calls may retrieve different results if a pLayerName is available in one call but not in another. The extensions supported by a layer may also change between two calls,
* e.g. if the layer implementation is replaced by a different version between those calls.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties(
const char* pLayerName,
uint32_t* pPropertyCount,
VkExtensionProperties* pProperties)
{
assert(!pLayerName); //TODO layers ignored for now
assert(pPropertyCount);
if(!pProperties)
{
*pPropertyCount = numInstanceExtensions;
return VK_INCOMPLETE;
}
int arraySize = *pPropertyCount;
int elementsWritten = min(numInstanceExtensions, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pProperties[c] = instanceExtensions[c];
}
*pPropertyCount = elementsWritten;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateInstance
* There is no global state in Vulkan and all per-application state is stored in a VkInstance object. Creating a VkInstance object initializes the Vulkan library
* vkCreateInstance verifies that the requested layers exist. If not, vkCreateInstance will return VK_ERROR_LAYER_NOT_PRESENT. Next vkCreateInstance verifies that
* the requested extensions are supported (e.g. in the implementation or in any enabled instance layer) and if any requested extension is not supported,
* vkCreateInstance must return VK_ERROR_EXTENSION_NOT_PRESENT. After verifying and enabling the instance layers and extensions the VkInstance object is
* created and returned to the application.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkInstance* pInstance)
{
assert(pInstance);
assert(pCreateInfo);
*pInstance = malloc(sizeof(_instance));
if(!*pInstance)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
(*pInstance)->numEnabledExtensions = 0;
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO: possibly we need to load layers here
//and store them in pInstance
assert(pCreateInfo->enabledLayerCount == 0);
if(pCreateInfo->enabledExtensionCount)
{
assert(pCreateInfo->ppEnabledExtensionNames);
}
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
{
int findres = findInstanceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
if(findres > -1)
{
(*pInstance)->enabledExtensions[(*pInstance)->numEnabledExtensions] = findres;
(*pInstance)->numEnabledExtensions++;
}
else
{
return VK_ERROR_EXTENSION_NOT_PRESENT;
}
}
//TODO ignored for now
//pCreateInfo->pApplicationInfo
int ret = openIoctl(); assert(!ret);
(*pInstance)->chipVersion = vc4_get_chip_info(controlFd);
(*pInstance)->hasTiling = vc4_test_tiling(controlFd);
(*pInstance)->hasControlFlow = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
(*pInstance)->hasEtc1 = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_ETC1);
(*pInstance)->hasThreadedFs = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
(*pInstance)->hasMadvise = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_MADVISE);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyInstance
*
*/
VKAPI_ATTR void VKAPI_CALL vkDestroyInstance(
VkInstance instance,
const VkAllocationCallbacks* pAllocator)
{
assert(instance);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO
closeIoctl();
}

273
driver/sync.c Normal file
View File

@ -0,0 +1,273 @@
#include "common.h"
#include "kernel/vc4_packet.h"
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSemaphore
* Semaphores are a synchronization primitive that can be used to insert a dependency between batches submitted to queues.
* Semaphores have two states - signaled and unsignaled. The state of a semaphore can be signaled after execution of a batch of commands is completed.
* A batch can wait for a semaphore to become signaled before it begins execution, and the semaphore is also unsignaled before the batch begins execution.
* As with most objects in Vulkan, semaphores are an interface to internal data which is typically opaque to applications.
* This internal data is referred to as a semaphores payload. However, in order to enable communication with agents outside of the current device,
* it is necessary to be able to export that payload to a commonly understood format, and subsequently import from that format as well.
* The internal data of a semaphore may include a reference to any resources and pending work associated with signal or unsignal operations performed on that semaphore object.
* Mechanisms to import and export that internal data to and from semaphores are provided below.
* These mechanisms indirectly enable applications to share semaphore state between two or more semaphores and other synchronization primitives across process and API boundaries.
* When created, the semaphore is in the unsignaled state.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(
VkDevice device,
const VkSemaphoreCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSemaphore* pSemaphore)
{
assert(device);
assert(pSemaphore);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//we'll probably just use an IOCTL to wait for a GPU sequence number to complete.
sem_t* s = malloc(sizeof(sem_t));
if(!s)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
sem_init(s, 0, 0); //create semaphore unsignalled, shared between threads
*pSemaphore = (VkSemaphore)s;
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdPipelineBarrier
* vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass.
* When vkCmdPipelineBarrier is submitted to a queue, it defines a memory dependency between commands that were submitted before it, and those submitted after it.
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the first synchronization scope includes all commands that occur earlier in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the first synchronization scope includes only commands that occur earlier in submission order within the same subpass.
* In either case, the first synchronization scope is limited to operations on the pipeline stages determined by the source stage mask specified by srcStageMask.
*
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the second synchronization scope includes all commands that occur later in submission order.
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the second synchronization scope includes only commands that occur later in submission order within the same subpass.
* In either case, the second synchronization scope is limited to operations on the pipeline stages determined by the destination stage mask specified by dstStageMask.
*
* The first access scope is limited to access in the pipeline stages determined by the source stage mask specified by srcStageMask.
* Within that, the first access scope only includes the first access scopes defined by elements of the pMemoryBarriers,
* pBufferMemoryBarriers and pImageMemoryBarriers arrays, which each define a set of memory barriers. If no memory barriers are specified,
* then the first access scope includes no accesses.
*
* The second access scope is limited to access in the pipeline stages determined by the destination stage mask specified by dstStageMask.
* Within that, the second access scope only includes the second access scopes defined by elements of the pMemoryBarriers, pBufferMemoryBarriers and pImageMemoryBarriers arrays,
* which each define a set of memory barriers. If no memory barriers are specified, then the second access scope includes no accesses.
*
* If dependencyFlags includes VK_DEPENDENCY_BY_REGION_BIT, then any dependency between framebuffer-space pipeline stages is framebuffer-local - otherwise it is framebuffer-global.
*/
VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier(
VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
VkDependencyFlags dependencyFlags,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
assert(commandBuffer);
//TODO pipeline stage flags
//VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
//VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
//VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
//VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
//VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
//VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
//VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
//VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
//VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
//VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
//VK_PIPELINE_STAGE_TRANSFER_BIT
//VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
//VK_PIPELINE_STAGE_HOST_BIT
//VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
//VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
//TODO dependency flags
//VK_DEPENDENCY_BY_REGION_BIT,
//VK_DEPENDENCY_DEVICE_GROUP_BIT,
//VK_DEPENDENCY_VIEW_LOCAL_BIT
//TODO access flags
//VK_ACCESS_INDIRECT_COMMAND_READ_BIT
//VK_ACCESS_INDEX_READ_BIT
//VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
//VK_ACCESS_UNIFORM_READ_BIT
//VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
//VK_ACCESS_SHADER_READ_BIT
//VK_ACCESS_SHADER_WRITE_BIT
//VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
//VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
//VK_ACCESS_TRANSFER_READ_BIT
//VK_ACCESS_TRANSFER_WRITE_BIT
//VK_ACCESS_HOST_READ_BIT
//VK_ACCESS_HOST_WRITE_BIT
//VK_ACCESS_MEMORY_READ_BIT
//VK_ACCESS_MEMORY_WRITE_BIT
//VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX
//VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX
//TODO Layout transition flags
//VK_IMAGE_LAYOUT_UNDEFINED
//VK_IMAGE_LAYOUT_GENERAL
//VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
//VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
//VK_IMAGE_LAYOUT_PREINITIALIZED
//VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
//VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL
//VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
//VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR
for(int c = 0; c < memoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < bufferMemoryBarrierCount; ++c)
{
//TODO
}
for(int c = 0; c < imageMemoryBarrierCount; ++c)
{
_image* i = pImageMemoryBarriers[c].image;
assert(i->layout == pImageMemoryBarriers[c].oldLayout || i->layout == VK_IMAGE_LAYOUT_UNDEFINED);
if(srcStageMask & VK_PIPELINE_STAGE_TRANSFER_BIT &&
pImageMemoryBarriers[c].srcAccessMask & VK_ACCESS_TRANSFER_WRITE_BIT &&
i->needToClear)
{
//insert CRs to clear the image
assert(i->layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
0, 0, 0, 0,
getFormatBpp(i->format) == 64, //64 bit color mode
i->samples > 1, //msaa
i->width, i->height, 0, 0, 0);
//START_TILE_BINNING resets the statechange counters in the hardware,
//which are what is used when a primitive is binned to a tile to
//figure out what new state packets need to be written to that tile's
//command list.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
clInsertStartTileBinning(&commandBuffer->binCl);
//Reset the current compressed primitives format. This gets modified
//by VC4_PACKET_GL_INDEXED_PRIMITIVE and
//VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
//of every tile.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length);
clInsertPrimitiveListFormat(&commandBuffer->binCl,
1, //16 bit
2); //tris
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, i->handle);
commandBuffer->submitCl.color_write.hindex = idx;
commandBuffer->submitCl.color_write.offset = 0;
commandBuffer->submitCl.color_write.flags = 0;
//TODO format
commandBuffer->submitCl.color_write.bits =
VC4_SET_FIELD(VC4_RENDER_CONFIG_FORMAT_RGBA8888, VC4_RENDER_CONFIG_FORMAT) |
VC4_SET_FIELD(i->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
commandBuffer->submitCl.clear_color[0] = i->clearColor[0];
commandBuffer->submitCl.clear_color[1] = i->clearColor[1];
//TODO ranges
commandBuffer->submitCl.min_x_tile = 0;
commandBuffer->submitCl.min_y_tile = 0;
uint32_t tileSizeW = 64;
uint32_t tileSizeH = 64;
if(i->samples > 1)
{
tileSizeW >>= 1;
tileSizeH >>= 1;
}
if(getFormatBpp(i->format) == 64)
{
tileSizeH >>= 1;
}
uint32_t widthInTiles = divRoundUp(i->width, tileSizeW);
uint32_t heightInTiles = divRoundUp(i->height, tileSizeH);
commandBuffer->submitCl.max_x_tile = widthInTiles - 1;
commandBuffer->submitCl.max_y_tile = heightInTiles - 1;
commandBuffer->submitCl.width = i->width;
commandBuffer->submitCl.height = i->height;
commandBuffer->submitCl.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
commandBuffer->submitCl.clear_z = 0; //TODO
commandBuffer->submitCl.clear_s = 0;
}
//transition to new layout
i->layout = pImageMemoryBarriers[c].newLayout;
}
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDeviceWaitIdle
* vkDeviceWaitIdle is equivalent to calling vkQueueWaitIdle for all queues owned by device.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(
VkDevice device)
{
assert(device);
for(int c = 0; c < numQueueFamilies; ++c)
{
for(int d = 0; d < device->numQueues[c]; ++d)
{
uint64_t lastFinishedSeqno;
vc4_seqno_wait(controlFd, &lastFinishedSeqno, device->queues[c][d].lastEmitSeqno, WAIT_TIMEOUT_INFINITE);
}
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySemaphore
*/
VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore(
VkDevice device,
VkSemaphore semaphore,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
assert(semaphore);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
sem_destroy((sem_t*)semaphore);
}

View File

@ -2,7 +2,7 @@
#include <vulkan/vulkan.h>
VkPhysicalDeviceLimits _limits =
static VkPhysicalDeviceLimits _limits =
{
//TODO these values might change
.maxImageDimension1D = 16384,
@ -113,7 +113,7 @@ VkPhysicalDeviceLimits _limits =
.nonCoherentAtomSize = 0x40
};
VkPhysicalDeviceFeatures _features =
static VkPhysicalDeviceFeatures _features =
{
//TODO this might change
.robustBufferAccess = 1,
@ -174,7 +174,7 @@ VkPhysicalDeviceFeatures _features =
};
#define numFeatures (sizeof(_features)/sizeof(VkBool32))
VkQueueFamilyProperties _queueFamilyProperties[] =
static VkQueueFamilyProperties _queueFamilyProperties[] =
{
{
.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
@ -185,7 +185,7 @@ VkQueueFamilyProperties _queueFamilyProperties[] =
};
#define numQueueFamilies (sizeof(_queueFamilyProperties)/sizeof(VkQueueFamilyProperties))
VkSurfaceFormatKHR supportedSurfaceFormats[] =
static VkSurfaceFormatKHR supportedSurfaceFormats[] =
{
{
.format = VK_FORMAT_R8G8B8A8_UNORM,
@ -259,29 +259,3 @@ static VkExtensionProperties deviceExtensions[] =
}
};
#define numDeviceExtensions (sizeof(deviceExtensions) / sizeof(VkExtensionProperties))
int findInstanceExtension(char* name)
{
for(int c = 0; c < numInstanceExtensions; ++c)
{
if(strcmp(instanceExtensions[c].extensionName, name) == 0)
{
return c;
}
}
return -1;
}
int findDeviceExtension(char* name)
{
for(int c = 0; c < numDeviceExtensions; ++c)
{
if(strcmp(deviceExtensions[c].extensionName, name) == 0)
{
return c;
}
}
return -1;
}

381
driver/wsi.c Normal file
View File

@ -0,0 +1,381 @@
#include "common.h"
/*
* Implementation of our RPI specific "extension"
*/
VkResult vkCreateRpiSurfaceKHR(
VkInstance instance,
const VkRpiSurfaceCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
{
assert(instance);
//assert(pCreateInfo); //ignored for now
assert(pSurface);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
*pSurface = (VkSurfaceKHR)modeset_create(controlFd);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySurfaceKHR
* Destroying a VkSurfaceKHR merely severs the connection between Vulkan and the native surface,
* and does not imply destroying the native surface, closing a window, or similar behavior
* (but we'll do so anyways...)
*/
VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR(
VkInstance instance,
VkSurfaceKHR surface,
const VkAllocationCallbacks* pAllocator)
{
assert(instance);
assert(surface);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
modeset_destroy(controlFd, (modeset_dev*)surface);
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceCapabilitiesKHR
* The capabilities of a swapchain targetting a surface are the intersection of the capabilities of the WSI platform,
* the native window or display, and the physical device. The resulting capabilities can be obtained with the queries listed
* below in this section. Capabilities that correspond to image creation parameters are not independent of each other:
* combinations of parameters that are not supported as reported by vkGetPhysicalDeviceImageFormatProperties are not supported
* by the surface on that physical device, even if the capabilities taken individually are supported as part of some other parameter combinations.
*
* capabilities the specified device supports for a swapchain created for the surface
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
{
assert(physicalDevice);
assert(surface);
assert(pSurfaceCapabilities);
pSurfaceCapabilities->minImageCount = 1; //min 1
pSurfaceCapabilities->maxImageCount = 2; //TODO max 2 for double buffering for now...
pSurfaceCapabilities->currentExtent.width = ((modeset_dev*)surface)->width;
pSurfaceCapabilities->currentExtent.height = ((modeset_dev*)surface)->height;
pSurfaceCapabilities->minImageExtent.width = ((modeset_dev*)surface)->width; //TODO
pSurfaceCapabilities->minImageExtent.height = ((modeset_dev*)surface)->height; //TODO
pSurfaceCapabilities->maxImageExtent.width = ((modeset_dev*)surface)->width; //TODO
pSurfaceCapabilities->maxImageExtent.height = ((modeset_dev*)surface)->height; //TODO
pSurfaceCapabilities->maxImageArrayLayers = 1; //TODO maybe more layers for cursor etc.
pSurfaceCapabilities->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO no rotation for now
pSurfaceCapabilities->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO get this from dev
pSurfaceCapabilities->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; //TODO no alpha compositing for now
pSurfaceCapabilities->supportedUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; //well we want to draw on the screen right
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceFormatsKHR
* If pSurfaceFormats is NULL, then the number of format pairs supported for the given surface is returned in pSurfaceFormatCount.
* The number of format pairs supported will be greater than or equal to 1. Otherwise, pSurfaceFormatCount must point to a variable
* set by the user to the number of elements in the pSurfaceFormats array, and on return the variable is overwritten with the number
* of structures actually written to pSurfaceFormats. If the value of pSurfaceFormatCount is less than the number of format pairs supported,
* at most pSurfaceFormatCount structures will be written. If pSurfaceFormatCount is smaller than the number of format pairs supported for the given surface,
* VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormatKHR* pSurfaceFormats)
{
assert(physicalDevice);
assert(surface);
assert(pSurfaceFormatCount);
const int numFormats = 1;
if(!pSurfaceFormats)
{
*pSurfaceFormatCount = numFormats;
return VK_SUCCESS;
}
int arraySize = *pSurfaceFormatCount;
int elementsWritten = min(numFormats, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pSurfaceFormats[c] = supportedSurfaceFormats[c];
}
*pSurfaceFormatCount = elementsWritten;
if(elementsWritten < numFormats)
{
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfacePresentModesKHR
* If pPresentModes is NULL, then the number of presentation modes supported for the given surface is returned in pPresentModeCount.
* Otherwise, pPresentModeCount must point to a variable set by the user to the number of elements in the pPresentModes array,
* and on return the variable is overwritten with the number of values actually written to pPresentModes.
* If the value of pPresentModeCount is less than the number of presentation modes supported, at most pPresentModeCount values will be written.
* If pPresentModeCount is smaller than the number of presentation modes supported for the given surface, VK_INCOMPLETE will be returned instead of
* VK_SUCCESS to indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
uint32_t* pPresentModeCount,
VkPresentModeKHR* pPresentModes)
{
assert(physicalDevice);
assert(surface);
assert(pPresentModeCount);
const int numModes = 1;
if(!pPresentModes)
{
*pPresentModeCount = numModes;
return VK_SUCCESS;
}
int arraySize = *pPresentModeCount;
int elementsWritten = min(numModes, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
//TODO
pPresentModes[c] = VK_PRESENT_MODE_FIFO_KHR;
}
*pPresentModeCount = elementsWritten;
if(elementsWritten < numModes)
{
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSwapchainKHR
*/
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR(
VkDevice device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSwapchainKHR* pSwapchain)
{
assert(device);
assert(pCreateInfo);
assert(pSwapchain);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
*pSwapchain = malloc(sizeof(_swapchain));
if(!*pSwapchain)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
_swapchain* s = *pSwapchain;
//TODO flags, layers, queue sharing, pretransform, composite alpha, present mode..., clipped, oldswapchain
//TODO external sync on surface, oldswapchain
s->images = malloc(sizeof(_image) * pCreateInfo->minImageCount);
if(!s->images)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
s->backbufferIdx = 0;
s->numImages = pCreateInfo->minImageCount;
s->surface = pCreateInfo->surface;
for(int c = 0; c < pCreateInfo->minImageCount; ++c)
{
s->images[c].width = pCreateInfo->imageExtent.width;
s->images[c].height = pCreateInfo->imageExtent.height;
s->images[c].depth = 1;
s->images[c].layers = pCreateInfo->imageArrayLayers;
s->images[c].miplevels = 1;
s->images[c].samples = 1; //TODO
s->images[c].usageBits = pCreateInfo->imageUsage;
s->images[c].format = pCreateInfo->imageFormat;
s->images[c].imageSpace = pCreateInfo->imageColorSpace;
s->images[c].concurrentAccess = pCreateInfo->imageSharingMode;
s->images[c].numQueueFamiliesWithAccess = pCreateInfo->queueFamilyIndexCount;
if(s->images[c].concurrentAccess)
{
s->images[c].queueFamiliesWithAccess = malloc(sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
memcpy(s->images[c].queueFamiliesWithAccess, pCreateInfo->pQueueFamilyIndices, sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
}
s->images[c].preTransformMode = pCreateInfo->preTransform;
s->images[c].compositeAlpha = pCreateInfo->compositeAlpha;
s->images[c].presentMode = pCreateInfo->presentMode;
s->images[c].clipped = pCreateInfo->clipped;
createImageBO(&s->images[c]);
int res = modeset_create_fb(controlFd, &s->images[c]); assert(res == 0);
}
//defer to first swapbuffer (or at least later, getting swapchain != presenting immediately)
//int res = modeset_fb_for_dev(controlFd, s->surface, &s->images[s->backbufferIdx]); assert(res == 0);
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetSwapchainImagesKHR
* If pSwapchainImages is NULL, then the number of presentable images for swapchain is returned in pSwapchainImageCount.
* Otherwise, pSwapchainImageCount must point to a variable set by the user to the number of elements in the pSwapchainImages array,
* and on return the variable is overwritten with the number of structures actually written to pSwapchainImages.
* If the value of pSwapchainImageCount is less than the number of presentable images for swapchain, at most pSwapchainImageCount structures will be written.
* If pSwapchainImageCount is smaller than the number of presentable images for swapchain, VK_INCOMPLETE will be returned instead of VK_SUCCESS to
* indicate that not all the available values were returned.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR(
VkDevice device,
VkSwapchainKHR swapchain,
uint32_t* pSwapchainImageCount,
VkImage* pSwapchainImages)
{
assert(device);
assert(swapchain);
assert(pSwapchainImageCount);
_swapchain* s = swapchain;
if(!pSwapchainImages)
{
*pSwapchainImageCount = s->numImages;
return VK_SUCCESS;
}
int arraySize = *pSwapchainImageCount;
int elementsWritten = min(s->numImages, arraySize);
for(int c = 0; c < elementsWritten; ++c)
{
pSwapchainImages[c] = &s->images[c];
}
*pSwapchainImageCount = elementsWritten;
if(elementsWritten < s->numImages)
{
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkAcquireNextImageKHR
*/
VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
VkDevice device,
VkSwapchainKHR swapchain,
uint64_t timeout,
VkSemaphore semaphore,
VkFence fence,
uint32_t* pImageIndex)
{
assert(device);
assert(swapchain);
assert(semaphore != VK_NULL_HANDLE || fence != VK_NULL_HANDLE);
sem_t* s = semaphore;
//TODO we need to keep track of currently acquired images?
//TODO wait timeout?
*pImageIndex = ((_swapchain*)swapchain)->backbufferIdx; //return back buffer index
//signal semaphore
int semVal; sem_getvalue(s, &semVal); assert(semVal <= 0); //make sure semaphore is unsignalled
sem_post(s);
//TODO signal fence
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueuePresentKHR
* Any writes to memory backing the images referenced by the pImageIndices and pSwapchains members of pPresentInfo,
* that are available before vkQueuePresentKHR is executed, are automatically made visible to the read access performed by the presentation engine.
* This automatic visibility operation for an image happens-after the semaphore signal operation, and happens-before the presentation engine accesses the image.
* Queueing an image for presentation defines a set of queue operations, including waiting on the semaphores and submitting a presentation request to the presentation engine.
* However, the scope of this set of queue operations does not include the actual processing of the image by the presentation engine.
* If vkQueuePresentKHR fails to enqueue the corresponding set of queue operations, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced is unaffected by the call or its failure.
* If vkQueuePresentKHR fails in such a way that the implementation is unable to make that guarantee, the implementation must return VK_ERROR_DEVICE_LOST.
* However, if the presentation request is rejected by the presentation engine with an error VK_ERROR_OUT_OF_DATE_KHR or VK_ERROR_SURFACE_LOST_KHR,
* the set of queue operations are still considered to be enqueued and thus any semaphore to be waited on gets unsignaled when the corresponding queue operation is complete.
*/
VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(
VkQueue queue,
const VkPresentInfoKHR* pPresentInfo)
{
assert(queue);
assert(pPresentInfo);
//wait for semaphore in present info set by submit ioctl to make sure cls are flushed
for(int c = 0; c < pPresentInfo->waitSemaphoreCount; ++c)
{
sem_wait((sem_t*)pPresentInfo->pWaitSemaphores[c]);
}
for(int c = 0; c < pPresentInfo->swapchainCount; ++c)
{
_swapchain* s = pPresentInfo->pSwapchains[c];
modeset_present_buffer(controlFd, (modeset_dev*)s->surface, &s->images[s->backbufferIdx]);
s->backbufferIdx = (s->backbufferIdx + 1) % s->numImages;
}
return VK_SUCCESS;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySwapchainKHR
*/
VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR(
VkDevice device,
VkSwapchainKHR swapchain,
const VkAllocationCallbacks* pAllocator)
{
assert(device);
assert(swapchain);
//TODO: allocator is ignored for now
assert(pAllocator == 0);
//TODO flush all ops
_swapchain* s = swapchain;
for(int c = 0; c < s->numImages; ++c)
{
vc4_bo_free(controlFd, s->images[c].handle, 0, s->images->size);
modeset_destroy_fb(controlFd, &s->images[c]);
}
free(s->images);
free(s);
}