mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2025-01-18 10:52:14 +01:00
split driver.c into multiple files
This commit is contained in:
parent
0c3e0f798b
commit
2dee7f9439
31
driver/AlignedAllocator.c
Normal file
31
driver/AlignedAllocator.c
Normal file
@ -0,0 +1,31 @@
|
||||
#include "AlignedAllocator.h"
|
||||
|
||||
void* alignedAlloc( unsigned bytes, unsigned alignment )
|
||||
{
|
||||
if( !bytes )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const unsigned maxBytes = 1024 * 1024 * 1024; //1GB is max on RPi
|
||||
|
||||
if( bytes > maxBytes )
|
||||
{
|
||||
return 0; //bad alloc
|
||||
}
|
||||
|
||||
void* pv = 0;
|
||||
|
||||
if( posix_memalign( &pv, alignment, bytes ) )
|
||||
{
|
||||
pv = 0; //allocation failed
|
||||
}
|
||||
|
||||
return pv;
|
||||
}
|
||||
|
||||
void alignedFree( void* p )
|
||||
{
|
||||
free( p );
|
||||
}
|
||||
|
@ -6,34 +6,8 @@ extern "C" {
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
void* alignedAlloc( unsigned bytes, unsigned alignment )
|
||||
{
|
||||
if( !bytes )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const unsigned maxBytes = 1024 * 1024 * 1024; //1GB is max on RPi
|
||||
|
||||
if( bytes > maxBytes )
|
||||
{
|
||||
return 0; //bad alloc
|
||||
}
|
||||
|
||||
void* pv = 0;
|
||||
|
||||
if( posix_memalign( &pv, alignment, bytes ) )
|
||||
{
|
||||
pv = 0; //allocation failed
|
||||
}
|
||||
|
||||
return pv;
|
||||
}
|
||||
|
||||
void alignedFree( void* p )
|
||||
{
|
||||
free( p );
|
||||
}
|
||||
void* alignedAlloc( unsigned bytes, unsigned alignment );
|
||||
void alignedFree( void* p );
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
157
driver/ConsecutivePoolAllocator.c
Normal file
157
driver/ConsecutivePoolAllocator.c
Normal file
@ -0,0 +1,157 @@
|
||||
#include "ConsecutivePoolAllocator.h"
|
||||
|
||||
#include "CustomAssert.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s)
|
||||
{
|
||||
assert(b); //only allocated memory
|
||||
assert(bs >= sizeof(void*)); //we need to be able to store
|
||||
assert(s%bs==0); //we want a size that is the exact multiple of block size
|
||||
assert(s > bs); //at least 1 element
|
||||
|
||||
ConsecutivePoolAllocator pa =
|
||||
{
|
||||
.buf = b,
|
||||
.nextFreeBlock = (uint32_t*)b,
|
||||
.blockSize = bs,
|
||||
.size = s
|
||||
};
|
||||
|
||||
//initialize linked list of free pointers
|
||||
uint32_t* ptr = pa.nextFreeBlock;
|
||||
unsigned last = s/bs - 1;
|
||||
for(unsigned c = 0; c < last; ++c)
|
||||
{
|
||||
*ptr = (uint32_t)ptr + bs;
|
||||
ptr += bs/4;
|
||||
}
|
||||
|
||||
*ptr = 0; //last element
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa)
|
||||
{
|
||||
//actual memory freeing is done by caller
|
||||
pa->buf = 0;
|
||||
pa->nextFreeBlock = 0;
|
||||
pa->blockSize = 0;
|
||||
pa->size = 0;
|
||||
}
|
||||
|
||||
//allocate numBlocks consecutive memory
|
||||
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
|
||||
{
|
||||
assert(pa->buf);
|
||||
|
||||
if(!pa->nextFreeBlock)
|
||||
{
|
||||
return 0; //no free blocks
|
||||
}
|
||||
|
||||
void* ret = 0;
|
||||
for(uint32_t* candidate = pa->nextFreeBlock; candidate; candidate = (uint32_t*)*candidate)
|
||||
{
|
||||
uint32_t found = 1;
|
||||
uint32_t* prevBlock = candidate;
|
||||
uint32_t* blockAfterCandidate = (uint32_t*)*candidate;
|
||||
//check if there are enough consecutive free blocks
|
||||
for(uint32_t c = 0; c < numBlocks - 1; ++c)
|
||||
{
|
||||
if(blockAfterCandidate - prevBlock != pa->blockSize)
|
||||
{
|
||||
//signal if not consecutive (ie. diff is greater than blocksize)
|
||||
found = 0;
|
||||
break;
|
||||
}
|
||||
prevBlock = blockAfterCandidate;
|
||||
blockAfterCandidate = (uint32_t*)*blockAfterCandidate;
|
||||
}
|
||||
|
||||
//numblocks consecutive blocks found
|
||||
if(found)
|
||||
{
|
||||
ret = candidate;
|
||||
if(pa->nextFreeBlock == candidate)
|
||||
{
|
||||
//candidate found immediately
|
||||
pa->nextFreeBlock = blockAfterCandidate;
|
||||
}
|
||||
else
|
||||
{
|
||||
//somewhere the linked list would point to candidate, we need to correct this
|
||||
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
|
||||
{
|
||||
if((uint32_t*)*nextFreeBlockCandidate == candidate)
|
||||
{
|
||||
*nextFreeBlockCandidate = (uint32_t)blockAfterCandidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//free numBlocks consecutive memory
|
||||
void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks)
|
||||
{
|
||||
assert(pa->buf);
|
||||
assert(p);
|
||||
|
||||
if((void*)pa->nextFreeBlock > p)
|
||||
{
|
||||
for(uint32_t c = 0; c < numBlocks - 1; ++c)
|
||||
{
|
||||
//set each allocated block to form a linked list
|
||||
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
|
||||
}
|
||||
//set last block to point to the next free
|
||||
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = (uint32_t)pa->nextFreeBlock;
|
||||
//set next free to the newly freed block
|
||||
pa->nextFreeBlock = p;
|
||||
return;
|
||||
}
|
||||
|
||||
//somewhere the linked list may point after the free block (or null), we need to correct this
|
||||
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
|
||||
{
|
||||
if((void*)*nextFreeBlockCandidate > p || !*nextFreeBlockCandidate)
|
||||
{
|
||||
for(uint32_t c = 0; c < numBlocks - 1; ++c)
|
||||
{
|
||||
//set each allocated block to form a linked list
|
||||
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
|
||||
}
|
||||
//set last block to point to the next free
|
||||
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = *nextFreeBlockCandidate;
|
||||
|
||||
*nextFreeBlockCandidate = (uint32_t)p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//if there's a block free after the current block, it just allocates one more block
|
||||
//else it frees current block and allocates a new one
|
||||
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks)
|
||||
{
|
||||
if(pa->nextFreeBlock == (uint32_t*)((char*)currentMem + currNumBlocks * pa->blockSize))
|
||||
{
|
||||
//we have one more block after current one, so just expand current
|
||||
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
|
||||
return currentMem;
|
||||
}
|
||||
else
|
||||
{
|
||||
void* ret = consecutivePoolAllocate(pa, currNumBlocks + 1);
|
||||
consecutivePoolFree(pa, currentMem, currNumBlocks);
|
||||
return ret;
|
||||
}
|
||||
}
|
@ -16,157 +16,11 @@ typedef struct ConsecutivePoolAllocator
|
||||
unsigned size; //size is exact multiple of block size
|
||||
} ConsecutivePoolAllocator;
|
||||
|
||||
ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s)
|
||||
{
|
||||
assert(b); //only allocated memory
|
||||
assert(bs >= sizeof(void*)); //we need to be able to store
|
||||
assert(s%bs==0); //we want a size that is the exact multiple of block size
|
||||
assert(s > bs); //at least 1 element
|
||||
|
||||
ConsecutivePoolAllocator pa =
|
||||
{
|
||||
.buf = b,
|
||||
.nextFreeBlock = (uint32_t*)b,
|
||||
.blockSize = bs,
|
||||
.size = s
|
||||
};
|
||||
|
||||
//initialize linked list of free pointers
|
||||
uint32_t* ptr = pa.nextFreeBlock;
|
||||
unsigned last = s/bs - 1;
|
||||
for(unsigned c = 0; c < last; ++c)
|
||||
{
|
||||
*ptr = (uint32_t)ptr + bs;
|
||||
ptr += bs/4;
|
||||
}
|
||||
|
||||
*ptr = 0; //last element
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa)
|
||||
{
|
||||
//actual memory freeing is done by caller
|
||||
pa->buf = 0;
|
||||
pa->nextFreeBlock = 0;
|
||||
pa->blockSize = 0;
|
||||
pa->size = 0;
|
||||
}
|
||||
|
||||
//allocate numBlocks consecutive memory
|
||||
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
|
||||
{
|
||||
assert(pa->buf);
|
||||
|
||||
if(!pa->nextFreeBlock)
|
||||
{
|
||||
return 0; //no free blocks
|
||||
}
|
||||
|
||||
void* ret = 0;
|
||||
for(uint32_t* candidate = pa->nextFreeBlock; candidate; candidate = (uint32_t*)*candidate)
|
||||
{
|
||||
uint32_t found = 1;
|
||||
uint32_t* prevBlock = candidate;
|
||||
uint32_t* blockAfterCandidate = (uint32_t*)*candidate;
|
||||
//check if there are enough consecutive free blocks
|
||||
for(uint32_t c = 0; c < numBlocks - 1; ++c)
|
||||
{
|
||||
if(blockAfterCandidate - prevBlock != pa->blockSize)
|
||||
{
|
||||
//signal if not consecutive (ie. diff is greater than blocksize)
|
||||
found = 0;
|
||||
break;
|
||||
}
|
||||
prevBlock = blockAfterCandidate;
|
||||
blockAfterCandidate = (uint32_t*)*blockAfterCandidate;
|
||||
}
|
||||
|
||||
//numblocks consecutive blocks found
|
||||
if(found)
|
||||
{
|
||||
ret = candidate;
|
||||
if(pa->nextFreeBlock == candidate)
|
||||
{
|
||||
//candidate found immediately
|
||||
pa->nextFreeBlock = blockAfterCandidate;
|
||||
}
|
||||
else
|
||||
{
|
||||
//somewhere the linked list would point to candidate, we need to correct this
|
||||
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
|
||||
{
|
||||
if((uint32_t*)*nextFreeBlockCandidate == candidate)
|
||||
{
|
||||
*nextFreeBlockCandidate = (uint32_t)blockAfterCandidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//free numBlocks consecutive memory
|
||||
void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks)
|
||||
{
|
||||
assert(pa->buf);
|
||||
assert(p);
|
||||
|
||||
if((void*)pa->nextFreeBlock > p)
|
||||
{
|
||||
for(uint32_t c = 0; c < numBlocks - 1; ++c)
|
||||
{
|
||||
//set each allocated block to form a linked list
|
||||
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
|
||||
}
|
||||
//set last block to point to the next free
|
||||
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = (uint32_t)pa->nextFreeBlock;
|
||||
//set next free to the newly freed block
|
||||
pa->nextFreeBlock = p;
|
||||
return;
|
||||
}
|
||||
|
||||
//somewhere the linked list may point after the free block (or null), we need to correct this
|
||||
for(uint32_t* nextFreeBlockCandidate = pa->nextFreeBlock; nextFreeBlockCandidate; nextFreeBlockCandidate = (uint32_t*)*nextFreeBlockCandidate)
|
||||
{
|
||||
if((void*)*nextFreeBlockCandidate > p || !*nextFreeBlockCandidate)
|
||||
{
|
||||
for(uint32_t c = 0; c < numBlocks - 1; ++c)
|
||||
{
|
||||
//set each allocated block to form a linked list
|
||||
*(uint32_t*)((char*)p + c * pa->blockSize) = (uint32_t)((char*)p + (c + 1) * pa->blockSize);
|
||||
}
|
||||
//set last block to point to the next free
|
||||
*(uint32_t*)((char*)p + (numBlocks - 1) * pa->blockSize) = *nextFreeBlockCandidate;
|
||||
|
||||
*nextFreeBlockCandidate = (uint32_t)p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//if there's a block free after the current block, it just allocates one more block
|
||||
//else it frees current block and allocates a new one
|
||||
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks)
|
||||
{
|
||||
if(pa->nextFreeBlock == (uint32_t*)((char*)currentMem + currNumBlocks * pa->blockSize))
|
||||
{
|
||||
//we have one more block after current one, so just expand current
|
||||
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
|
||||
return currentMem;
|
||||
}
|
||||
else
|
||||
{
|
||||
void* ret = consecutivePoolAllocate(pa, currNumBlocks + 1);
|
||||
consecutivePoolFree(pa, currentMem, currNumBlocks);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, unsigned s);
|
||||
void destroyConsecutivePoolAllocator(ConsecutivePoolAllocator* pa);
|
||||
void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks);
|
||||
void consecutivePoolFree(ConsecutivePoolAllocator* pa, void* p, uint32_t numBlocks);
|
||||
void* consecutivePoolReAllocate(ConsecutivePoolAllocator* pa, void* currentMem, uint32_t currNumBlocks);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
715
driver/ControlListUtil.c
Normal file
715
driver/ControlListUtil.c
Normal file
@ -0,0 +1,715 @@
|
||||
#include "ControlListUtil.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint32_t divRoundUp(uint32_t n, uint32_t d)
|
||||
{
|
||||
return (((n) + (d) - 1) / (d));
|
||||
}
|
||||
|
||||
//move bits to offset, mask rest to 0
|
||||
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset)
|
||||
{
|
||||
return (d << offset) & (~(~0 << bits) << offset);
|
||||
}
|
||||
|
||||
uint32_t clSize(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
return cl->nextFreeByte - cl->buffer;
|
||||
}
|
||||
|
||||
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
|
||||
{
|
||||
uint32_t currSize = clSize(cl);
|
||||
if(currSize + size < CONTROL_LIST_SIZE)
|
||||
{
|
||||
return 1; //fits!
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0; //need to reallocate
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void clInit(ControlList* cl, void* buffer)
|
||||
{
|
||||
assert(cl);
|
||||
assert(buffer);
|
||||
cl->buffer = buffer;
|
||||
cl->numBlocks = 1;
|
||||
cl->nextFreeByte = &cl->buffer[0];
|
||||
}
|
||||
|
||||
void clInsertHalt(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_HALT_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertNop(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_NOP_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertFlush(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_FLUSH_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertFlushAllState(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_FLUSH_ALL_STATE_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertStartTileBinning(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_START_TILE_BINNING_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertIncrementSemaphore(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_INCREMENT_SEMAPHORE_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertWaitOnSemaphore(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_WAIT_ON_SEMAPHORE_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertBranch(ControlList* cls, ControlListAddress address)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertReturnFromSubList(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_RETURN_FROM_SUB_LIST_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertStoreMultiSampleResolvedTileColorBufferAndEOF(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_AND_EOF_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertStoreFullResolutionTileBuffer(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t lastTile, //0/1
|
||||
uint32_t disableClearOnWrite, //0/1
|
||||
uint32_t disableZStencilBufferWrite, //0/1
|
||||
uint32_t disableColorBufferWrite) //0/1
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_STORE_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferWrite, 1, 0) |
|
||||
moveBits(disableZStencilBufferWrite, 1, 1) |
|
||||
moveBits(disableClearOnWrite, 1, 2) |
|
||||
moveBits(lastTile, 1, 3) |
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertReLoadFullResolutionTileBuffer(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t disableZStencilBufferRead, //0/1
|
||||
uint32_t disableColorBufferRead) //0/1
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_RE_LOAD_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferRead, 1, 0) |
|
||||
moveBits(disableZStencilBufferRead, 1, 1) |
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertStoreTileBufferGeneral(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t lastTileOfFrame, //0/1
|
||||
uint32_t disableZStencilBufferDump, //0/1
|
||||
uint32_t disableColorBufferDump, //0/1
|
||||
uint32_t disableZStencilBufferClearOnStoreDump, //0/1
|
||||
uint32_t disableColorBufferClearOnStoreDump, //0/1
|
||||
uint32_t disableDoubleBufferSwap, //0/1
|
||||
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
|
||||
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
|
||||
uint32_t format, //0/1/2 raster/t/lt
|
||||
uint32_t bufferToStore) //0/1/2/3/5 none/color/zstencil/z/full
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_STORE_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
*cls->nextFreeByte =
|
||||
moveBits(bufferToStore, 3, 0) |
|
||||
moveBits(format, 2, 4) |
|
||||
moveBits(mode, 2, 6);
|
||||
cls->nextFreeByte++;
|
||||
*cls->nextFreeByte =
|
||||
moveBits(pixelColorFormat, 2, 0) |
|
||||
moveBits(disableDoubleBufferSwap, 1, 4) |
|
||||
moveBits(disableColorBufferClearOnStoreDump, 1, 5) |
|
||||
moveBits(disableZStencilBufferClearOnStoreDump, 1, 6) |
|
||||
moveBits(1, 1, 7); //disable vg mask
|
||||
cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferDump, 1, 0) |
|
||||
moveBits(disableZStencilBufferDump, 1, 1) |
|
||||
moveBits(1, 1, 2) | //disable vg mask
|
||||
moveBits(lastTileOfFrame, 1, 3) |
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertLoadTileBufferGeneral(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t disableZStencilBufferLoad, //0/1
|
||||
uint32_t disableColorBufferLoad, //0/1
|
||||
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
|
||||
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
|
||||
uint32_t format, //0/1/2 raster/t/lt
|
||||
uint32_t bufferToLoad) //0/1/2/3/5 none/color/zstencil/z/full
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_LOAD_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
*cls->nextFreeByte =
|
||||
moveBits(bufferToLoad, 3, 0) |
|
||||
moveBits(format, 2, 4);
|
||||
cls->nextFreeByte++;
|
||||
*cls->nextFreeByte =
|
||||
moveBits(pixelColorFormat, 2, 0);
|
||||
cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferLoad, 1, 0) |
|
||||
moveBits(disableZStencilBufferLoad, 1, 1) |
|
||||
moveBits(1, 1, 2) | //disable vg mask
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
void clInsertIndexedPrimitiveList(ControlList* cl,
|
||||
uint32_t maxIndex,
|
||||
uint32_t indicesAddress,
|
||||
uint32_t length,
|
||||
uint32_t indexType, //0/1: 8 or 16 bit
|
||||
enum V3D21_Primitive primitiveMode)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = indicesAddress; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = maxIndex; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertVertexArrayPrimitives(ControlList* cl,
|
||||
uint32_t firstVertexIndex,
|
||||
uint32_t length,
|
||||
enum V3D21_Primitive primitiveMode)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte = moveBits(primitiveMode, 8, 0); cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = firstVertexIndex; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertPrimitiveListFormat(ControlList* cl,
|
||||
uint32_t dataType, //1/3: 16 or 32 bit
|
||||
uint32_t primitiveType) //0/1/2/3: point/line/tri/rhy
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_PRIMITIVE_LIST_FORMAT_opcode; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte = moveBits(dataType, 4, 4) | moveBits(primitiveType, 4, 0); cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertShaderState(ControlList* cl,
|
||||
uint32_t address,
|
||||
uint32_t extendedShaderRecord, //0/1: true/false
|
||||
uint32_t numberOfAttributeArrays)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
*(uint32_t*)cl->nextFreeByte =
|
||||
moveBits(address, 28, 4) |
|
||||
moveBits(extendedShaderRecord, 1, 3) |
|
||||
moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
/*
|
||||
void clInsertClearColors(ControlList* cl,
|
||||
uint32_t clearStencil,
|
||||
uint32_t clearZ, //24 bit Z
|
||||
uint64_t clearColor) //2x RGBA8 or 1x RGBA16
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLEAR_COLORS_opcode; cl->nextFreeByte++;
|
||||
*(uint64_t*)cl->nextFreeByte = clearColor; cl->nextFreeByte += 8;
|
||||
*(uint32_t*)cl->nextFreeByte = clearZ; cl->nextFreeByte += 4; //24 bits for Z, 8 bit for vg mask (unused)
|
||||
*cl->nextFreeByte = clearStencil; cl->nextFreeByte++;
|
||||
}
|
||||
*/
|
||||
|
||||
void clInsertConfigurationBits(ControlList* cl,
|
||||
uint32_t earlyZUpdatesEnable, //0/1
|
||||
uint32_t earlyZEnable, //0/1
|
||||
uint32_t zUpdatesEnable, //0/1
|
||||
enum V3D21_Compare_Function depthTestFunction,
|
||||
uint32_t coverageReadMode, //0/1 clear/leave as is
|
||||
uint32_t coveragePipeSelect, //0/1
|
||||
uint32_t coverageUpdateMode, //0/1/2/3 nonzero, odd, or, zero
|
||||
uint32_t coverageReadType, //0/1 4*8bit, 16 bit mask
|
||||
uint32_t rasterizerOversampleMode, //0/1/2 none, 4x, 16x
|
||||
uint32_t enableDepthOffset, //0/1
|
||||
uint32_t clockwisePrimitives, //0/1
|
||||
uint32_t enableReverseFacingPrimitive, //0/1
|
||||
uint32_t enableForwardFacingPrimitive) //0/1
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte =
|
||||
moveBits(enableForwardFacingPrimitive, 1, 0) |
|
||||
moveBits(enableReverseFacingPrimitive, 1, 1) |
|
||||
moveBits(clockwisePrimitives, 1, 2) |
|
||||
moveBits(enableDepthOffset, 1, 3) |
|
||||
moveBits(coverageReadType, 1, 5) |
|
||||
moveBits(rasterizerOversampleMode, 2, 6) |
|
||||
moveBits(coveragePipeSelect, 1, 8) |
|
||||
moveBits(coverageUpdateMode, 2, 9) |
|
||||
moveBits(coverageReadMode, 1, 11) |
|
||||
moveBits(depthTestFunction, 3, 12) |
|
||||
moveBits(zUpdatesEnable, 1, 15) |
|
||||
moveBits(earlyZEnable, 1, 16) |
|
||||
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertFlatShadeFlags(ControlList* cl,
|
||||
uint32_t flags)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = flags; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertPointSize(ControlList* cl,
|
||||
float size)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_POINT_SIZE_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = size; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertLineWidth(ControlList* cl,
|
||||
float width)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_LINE_WIDTH_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertRHTXBoundary(ControlList* cl,
|
||||
uint32_t boundary) //sint16
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByte++;
|
||||
*(uint16_t*)cl->nextFreeByte = moveBits(boundary, 16, 0); cl->nextFreeByte += 2;
|
||||
}
|
||||
|
||||
void clInsertDepthOffset(ControlList* cl,
|
||||
uint32_t units, //float 187
|
||||
uint32_t factor) //float 187
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(factor, 16, 0) | moveBits(units, 16, 16); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertClipWindow(ControlList* cl,
|
||||
uint32_t width, //uint16
|
||||
uint32_t height, //uint16
|
||||
uint32_t bottomPixelCoord, //uint16
|
||||
uint32_t leftPixelCoord) //uint16
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertViewPortOffset(ControlList* cl,
|
||||
uint32_t x, //sint16
|
||||
uint32_t y //sint16
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(x, 16, 0) | moveBits(y, 16, 16); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertZMinMaxClippingPlanes(ControlList* cl,
|
||||
float minZw,
|
||||
float maxZw
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = minZw; cl->nextFreeByte += 4;
|
||||
*(float*)cl->nextFreeByte = maxZw; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertClipperXYScaling(ControlList* cl,
|
||||
float width, //half height in 1/16 of pixel
|
||||
float height //half width in 1/16 of pixel
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
|
||||
*(float*)cl->nextFreeByte = height; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertClipperZScaleOffset(ControlList* cl,
|
||||
float zOffset, //zc to zs
|
||||
float zScale //zc to zs
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = zScale; cl->nextFreeByte += 4;
|
||||
*(float*)cl->nextFreeByte = zOffset; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertTileBinningModeConfiguration(ControlList* cl,
|
||||
uint32_t doubleBufferInNonMsMode, //0/1
|
||||
uint32_t tileAllocationBlockSize, //0/1/2/3 32/64/128/256 bytes
|
||||
uint32_t tileAllocationInitialBlockSize, //0/1/2/3 32/64/128/256 bytes
|
||||
uint32_t autoInitializeTileStateDataArray, //0/1
|
||||
uint32_t tileBuffer64BitColorDepth, //0/1
|
||||
uint32_t multisampleMode4x, //0/1
|
||||
uint32_t widthInPixels,
|
||||
uint32_t heightInPixels,
|
||||
uint32_t tileStateDataArrayAddress, //16 byte aligned, size of 48 bytes * num tiles
|
||||
uint32_t tileAllocationMemorySize,
|
||||
uint32_t tileAllocationMemoryAddress
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = tileAllocationMemorySize; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = tileStateDataArrayAddress; cl->nextFreeByte += 4;
|
||||
uint32_t tileSizeW = 64;
|
||||
uint32_t tileSizeH = 64;
|
||||
|
||||
if(multisampleMode4x)
|
||||
{
|
||||
tileSizeW >>= 1;
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
if(tileBuffer64BitColorDepth)
|
||||
{
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
uint32_t widthInTiles = divRoundUp(widthInPixels, tileSizeW);
|
||||
uint32_t heightInTiles = divRoundUp(heightInPixels, tileSizeH);
|
||||
*(uint8_t*)cl->nextFreeByte = widthInTiles; cl->nextFreeByte++;
|
||||
*(uint8_t*)cl->nextFreeByte = heightInTiles; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte =
|
||||
moveBits(multisampleMode4x, 1, 0) |
|
||||
moveBits(tileBuffer64BitColorDepth, 1, 1) |
|
||||
moveBits(autoInitializeTileStateDataArray, 1, 2) |
|
||||
moveBits(tileAllocationInitialBlockSize, 2, 3) |
|
||||
moveBits(tileAllocationBlockSize, 2, 5) |
|
||||
moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
/*
|
||||
void clInsertTileRenderingModeConfiguration(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t doubleBufferInNonMsMode, //0/1
|
||||
uint32_t earlyZEarlyCovDisable, //0/1
|
||||
uint32_t earlyZUpdateDirection, //0/1 lt,le/gt,ge
|
||||
uint32_t selectCoverageMode, //0/1
|
||||
uint32_t memoryFormat, //0/1/2 linear/t/lt
|
||||
uint32_t decimateMode, //0/1/2 0x/4x/16x
|
||||
uint32_t nonHDRFrameFormatColorFormat, //0/1/2 bgr565dithered/rgba8/bgr565nodither
|
||||
uint32_t tileBufferHDRMode, //0/1
|
||||
uint32_t multisampleMode4x, //0/1
|
||||
uint32_t widthPixels,
|
||||
uint32_t heightPixels)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_TILE_RENDERING_MODE_CONFIGURATION_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
*(uint32_t*)cls->nextFreeByte = moveBits(widthPixels, 16, 0) | moveBits(heightPixels, 16, 16); cls->nextFreeByte += 4;
|
||||
*(uint16_t*)cls->nextFreeByte =
|
||||
moveBits(multisampleMode4x, 1, 0) |
|
||||
moveBits(tileBufferHDRMode, 1, 1) |
|
||||
moveBits(nonHDRFrameFormatColorFormat, 2, 2) |
|
||||
moveBits(decimateMode, 2, 4) |
|
||||
moveBits(memoryFormat, 2, 6) |
|
||||
moveBits(0, 1, 8) | //vg buffer enable
|
||||
moveBits(selectCoverageMode, 1, 9) |
|
||||
moveBits(earlyZUpdateDirection, 1, 10) |
|
||||
moveBits(earlyZEarlyCovDisable, 1, 11) |
|
||||
moveBits(doubleBufferInNonMsMode, 1, 12); cls->nextFreeByte += 2;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
void clInsertTileCoordinates(ControlList* cl,
|
||||
uint32_t tileColumnNumber, //int8
|
||||
uint32_t tileRowNumber) //int8
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_TILE_COORDINATES_opcode; cl->nextFreeByte++;
|
||||
*(uint16_t*)cl->nextFreeByte = moveBits(tileColumnNumber, 8, 0) | moveBits(tileRowNumber, 8, 8); cl->nextFreeByte += 2;
|
||||
}
|
||||
*/
|
||||
|
||||
void clInsertGEMRelocations(ControlList* cl,
|
||||
uint32_t buffer0,
|
||||
uint32_t buffer1)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = buffer0; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = buffer1; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertShaderRecord(ControlList* cls,
|
||||
uint32_t fragmentShaderIsSingleThreaded, //0/1
|
||||
uint32_t pointSizeIncludedInShadedVertexData, //0/1
|
||||
uint32_t enableClipping, //0/1
|
||||
uint32_t fragmentNumberOfUnusedUniforms,
|
||||
uint32_t fragmentNumberOfVaryings,
|
||||
uint32_t fragmentUniformsAddress,
|
||||
ControlListAddress fragmentCodeAddress,
|
||||
uint32_t vertexNumberOfUnusedUniforms,
|
||||
uint32_t vertexAttributeArraySelectBits,
|
||||
uint32_t vertexTotalAttributesSize,
|
||||
uint32_t vertexUniformsAddress,
|
||||
ControlListAddress vertexCodeAddress)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
//TODO is this correct?
|
||||
*cls->nextFreeByte =
|
||||
moveBits(fragmentShaderIsSingleThreaded, 1, 0) |
|
||||
moveBits(pointSizeIncludedInShadedVertexData, 1, 1) |
|
||||
moveBits(enableClipping, 1, 2); cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = 0; cls->nextFreeByte++;
|
||||
*(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
|
||||
*cls->nextFreeByte = fragmentNumberOfVaryings; cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &fragmentCodeAddress);
|
||||
*(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4;
|
||||
*(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4;
|
||||
|
||||
*(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
|
||||
*cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &vertexCodeAddress);
|
||||
*(uint32_t*)cls->nextFreeByte = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); cls->nextFreeByte += 4; //???
|
||||
cls->nextFreeByte += 4;
|
||||
//skip coordinate shader stuff
|
||||
cls->nextFreeByte += 16;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertAttributeRecord(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t sizeBytes,
|
||||
uint32_t stride,
|
||||
uint32_t vertexVPMOffset)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
uint32_t sizeBytesMinusOne = sizeBytes - 1;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
*cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = stride; cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = vertexVPMOffset; cls->nextFreeByte++;
|
||||
cls->nextFreeByte++; //skip coordinate shader stuff
|
||||
}
|
||||
|
||||
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle)
|
||||
{
|
||||
uint32_t c = 0;
|
||||
|
||||
uint32_t numHandles = clSize(handlesCl) / 4;
|
||||
|
||||
for(; c < numHandles; ++c)
|
||||
{
|
||||
if(((uint32_t*)handlesCl->buffer)[c] == handle)
|
||||
{
|
||||
//found
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
//write handle to handles cl
|
||||
*(uint32_t*)handlesCl->nextFreeByte = handle;
|
||||
handlesCl->nextFreeByte += 4;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl + handles cl)
|
||||
inline void clEmitShaderRelocation(ControlList* cls, const ControlListAddress* address)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
assert(address);
|
||||
assert(address->handle);
|
||||
|
||||
//search for handle in handles cl
|
||||
//if found insert handle index
|
||||
|
||||
ControlList* cl = cls;
|
||||
ControlList* handlesCl = cls + 1;
|
||||
|
||||
//store offset within handles in cl
|
||||
*(uint32_t*)cl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle);
|
||||
cl->nextFreeByte += 4;
|
||||
}
|
@ -21,7 +21,7 @@ typedef struct ControlList
|
||||
uint8_t* nextFreeByte; //pointer to the next available free byte
|
||||
} ControlList;
|
||||
|
||||
static inline void clEmitShaderRelocation(ControlList* cl, const ControlListAddress* address);
|
||||
void clEmitShaderRelocation(ControlList* cl, const ControlListAddress* address);
|
||||
|
||||
#define __gen_user_data struct ControlList
|
||||
#define __gen_address_type ControlListAddress
|
||||
@ -30,356 +30,40 @@ static inline void clEmitShaderRelocation(ControlList* cl, const ControlListAddr
|
||||
|
||||
#include "brcm/cle/v3d_packet_v21_pack.h"
|
||||
|
||||
uint32_t divRoundUp(uint32_t n, uint32_t d)
|
||||
{
|
||||
return (((n) + (d) - 1) / (d));
|
||||
}
|
||||
|
||||
//move bits to offset, mask rest to 0
|
||||
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset)
|
||||
{
|
||||
return (d << offset) & (~(~0 << bits) << offset);
|
||||
}
|
||||
|
||||
uint32_t clSize(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
return cl->nextFreeByte - cl->buffer;
|
||||
}
|
||||
|
||||
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
|
||||
{
|
||||
uint32_t currSize = clSize(cl);
|
||||
if(currSize + size < CONTROL_LIST_SIZE)
|
||||
{
|
||||
return 1; //fits!
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0; //need to reallocate
|
||||
}
|
||||
}
|
||||
|
||||
void clInit(ControlList* cl, void* buffer)
|
||||
{
|
||||
assert(cl);
|
||||
assert(buffer);
|
||||
cl->buffer = buffer;
|
||||
cl->numBlocks = 1;
|
||||
cl->nextFreeByte = &cl->buffer[0];
|
||||
}
|
||||
|
||||
void clInsertHalt(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_HALT_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertNop(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_NOP_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertFlush(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_FLUSH_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertFlushAllState(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_FLUSH_ALL_STATE_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertStartTileBinning(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_START_TILE_BINNING_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertIncrementSemaphore(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_INCREMENT_SEMAPHORE_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertWaitOnSemaphore(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_WAIT_ON_SEMAPHORE_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertBranch(ControlList* cls, ControlListAddress address)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
void clInsertReturnFromSubList(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_RETURN_FROM_SUB_LIST_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
void clInsertStoreMultiSampleResolvedTileColorBufferAndEOF(ControlList* cl)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_AND_EOF_opcode;
|
||||
cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertStoreFullResolutionTileBuffer(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t lastTile, //0/1
|
||||
uint32_t disableClearOnWrite, //0/1
|
||||
uint32_t disableZStencilBufferWrite, //0/1
|
||||
uint32_t disableColorBufferWrite) //0/1
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_STORE_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferWrite, 1, 0) |
|
||||
moveBits(disableZStencilBufferWrite, 1, 1) |
|
||||
moveBits(disableClearOnWrite, 1, 2) |
|
||||
moveBits(lastTile, 1, 3) |
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertReLoadFullResolutionTileBuffer(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t disableZStencilBufferRead, //0/1
|
||||
uint32_t disableColorBufferRead) //0/1
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_RE_LOAD_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferRead, 1, 0) |
|
||||
moveBits(disableZStencilBufferRead, 1, 1) |
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertStoreTileBufferGeneral(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t lastTileOfFrame, //0/1
|
||||
uint32_t disableZStencilBufferDump, //0/1
|
||||
uint32_t disableColorBufferDump, //0/1
|
||||
uint32_t disableZStencilBufferClearOnStoreDump, //0/1
|
||||
uint32_t disableColorBufferClearOnStoreDump, //0/1
|
||||
uint32_t disableDoubleBufferSwap, //0/1
|
||||
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
|
||||
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
|
||||
uint32_t format, //0/1/2 raster/t/lt
|
||||
uint32_t bufferToStore) //0/1/2/3/5 none/color/zstencil/z/full
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_STORE_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
*cls->nextFreeByte =
|
||||
moveBits(bufferToStore, 3, 0) |
|
||||
moveBits(format, 2, 4) |
|
||||
moveBits(mode, 2, 6);
|
||||
cls->nextFreeByte++;
|
||||
*cls->nextFreeByte =
|
||||
moveBits(pixelColorFormat, 2, 0) |
|
||||
moveBits(disableDoubleBufferSwap, 1, 4) |
|
||||
moveBits(disableColorBufferClearOnStoreDump, 1, 5) |
|
||||
moveBits(disableZStencilBufferClearOnStoreDump, 1, 6) |
|
||||
moveBits(1, 1, 7); //disable vg mask
|
||||
cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferDump, 1, 0) |
|
||||
moveBits(disableZStencilBufferDump, 1, 1) |
|
||||
moveBits(1, 1, 2) | //disable vg mask
|
||||
moveBits(lastTileOfFrame, 1, 3) |
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
//input: 2 cls (cl, handles cl)
|
||||
void clInsertLoadTileBufferGeneral(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t disableZStencilBufferLoad, //0/1
|
||||
uint32_t disableColorBufferLoad, //0/1
|
||||
uint32_t pixelColorFormat, //0/1/2 RGBA8/BGR565dither/BGR565nodither
|
||||
uint32_t mode, //0/1/2 sample0/decimate4x/decimate16x
|
||||
uint32_t format, //0/1/2 raster/t/lt
|
||||
uint32_t bufferToLoad) //0/1/2/3/5 none/color/zstencil/z/full
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_LOAD_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
*cls->nextFreeByte =
|
||||
moveBits(bufferToLoad, 3, 0) |
|
||||
moveBits(format, 2, 4);
|
||||
cls->nextFreeByte++;
|
||||
*cls->nextFreeByte =
|
||||
moveBits(pixelColorFormat, 2, 0);
|
||||
cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte =
|
||||
moveBits(disableColorBufferLoad, 1, 0) |
|
||||
moveBits(disableZStencilBufferLoad, 1, 1) |
|
||||
moveBits(1, 1, 2) | //disable vg mask
|
||||
moveBits(address.offset, 28, 4);
|
||||
cls->nextFreeByte += 4;
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
uint32_t divRoundUp(uint32_t n, uint32_t d);
|
||||
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset);
|
||||
uint32_t clSize(ControlList* cl);
|
||||
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size);
|
||||
void clInit(ControlList* cl, void* buffer);
|
||||
void clInsertHalt(ControlList* cl);
|
||||
void clInsertNop(ControlList* cl);
|
||||
void clInsertFlush(ControlList* cl);
|
||||
void clInsertFlushAllState(ControlList* cl);
|
||||
void clInsertStartTileBinning(ControlList* cl);
|
||||
void clInsertIncrementSemaphore(ControlList* cl);
|
||||
void clInsertWaitOnSemaphore(ControlList* cl);
|
||||
void clInsertBranch(ControlList* cls, ControlListAddress address);
|
||||
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address);
|
||||
void clInsertReturnFromSubList(ControlList* cl);
|
||||
void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl);
|
||||
void clInsertStoreMultiSampleResolvedTileColorBufferAndEOF(ControlList* cl);
|
||||
void clInsertIndexedPrimitiveList(ControlList* cl,
|
||||
uint32_t maxIndex,
|
||||
uint32_t indicesAddress,
|
||||
uint32_t length,
|
||||
uint32_t indexType, //0/1: 8 or 16 bit
|
||||
enum V3D21_Primitive primitiveMode)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = indicesAddress; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = maxIndex; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
enum V3D21_Primitive primitiveMode);
|
||||
void clInsertVertexArrayPrimitives(ControlList* cl,
|
||||
uint32_t firstVertexIndex,
|
||||
uint32_t length,
|
||||
enum V3D21_Primitive primitiveMode)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_VERTEX_ARRAY_PRIMITIVES_opcode; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte = moveBits(primitiveMode, 8, 0); cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = length; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = firstVertexIndex; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
enum V3D21_Primitive primitiveMode);
|
||||
void clInsertPrimitiveListFormat(ControlList* cl,
|
||||
uint32_t dataType, //1/3: 16 or 32 bit
|
||||
uint32_t primitiveType) //0/1/2/3: point/line/tri/rhy
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_PRIMITIVE_LIST_FORMAT_opcode; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte = moveBits(dataType, 4, 4) | moveBits(primitiveType, 4, 0); cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
uint32_t primitiveType); //0/1/2/3: point/line/tri/rhy
|
||||
void clInsertShaderState(ControlList* cl,
|
||||
uint32_t address,
|
||||
uint32_t extendedShaderRecord, //0/1: true/false
|
||||
uint32_t numberOfAttributeArrays)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_GL_SHADER_STATE_opcode; cl->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
*(uint32_t*)cl->nextFreeByte =
|
||||
moveBits(address, 28, 4) |
|
||||
moveBits(extendedShaderRecord, 1, 3) |
|
||||
moveBits(numberOfAttributeArrays, 3, 0); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
/*
|
||||
void clInsertClearColors(ControlList* cl,
|
||||
uint32_t clearStencil,
|
||||
uint32_t clearZ, //24 bit Z
|
||||
uint64_t clearColor) //2x RGBA8 or 1x RGBA16
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLEAR_COLORS_opcode; cl->nextFreeByte++;
|
||||
*(uint64_t*)cl->nextFreeByte = clearColor; cl->nextFreeByte += 8;
|
||||
*(uint32_t*)cl->nextFreeByte = clearZ; cl->nextFreeByte += 4; //24 bits for Z, 8 bit for vg mask (unused)
|
||||
*cl->nextFreeByte = clearStencil; cl->nextFreeByte++;
|
||||
}
|
||||
*/
|
||||
|
||||
uint32_t numberOfAttributeArrays);
|
||||
void clInsertConfigurationBits(ControlList* cl,
|
||||
uint32_t earlyZUpdatesEnable, //0/1
|
||||
uint32_t earlyZEnable, //0/1
|
||||
@ -393,142 +77,39 @@ void clInsertConfigurationBits(ControlList* cl,
|
||||
uint32_t enableDepthOffset, //0/1
|
||||
uint32_t clockwisePrimitives, //0/1
|
||||
uint32_t enableReverseFacingPrimitive, //0/1
|
||||
uint32_t enableForwardFacingPrimitive) //0/1
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte =
|
||||
moveBits(enableForwardFacingPrimitive, 1, 0) |
|
||||
moveBits(enableReverseFacingPrimitive, 1, 1) |
|
||||
moveBits(clockwisePrimitives, 1, 2) |
|
||||
moveBits(enableDepthOffset, 1, 3) |
|
||||
moveBits(coverageReadType, 1, 5) |
|
||||
moveBits(rasterizerOversampleMode, 2, 6) |
|
||||
moveBits(coveragePipeSelect, 1, 8) |
|
||||
moveBits(coverageUpdateMode, 2, 9) |
|
||||
moveBits(coverageReadMode, 1, 11) |
|
||||
moveBits(depthTestFunction, 3, 12) |
|
||||
moveBits(zUpdatesEnable, 1, 15) |
|
||||
moveBits(earlyZEnable, 1, 16) |
|
||||
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
uint32_t enableForwardFacingPrimitive); //0/1
|
||||
void clInsertFlatShadeFlags(ControlList* cl,
|
||||
uint32_t flags)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = flags; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
uint32_t flags);
|
||||
void clInsertPointSize(ControlList* cl,
|
||||
float size)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_POINT_SIZE_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = size; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
float size);
|
||||
void clInsertLineWidth(ControlList* cl,
|
||||
float width)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_LINE_WIDTH_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
float width);
|
||||
void clInsertRHTXBoundary(ControlList* cl,
|
||||
uint32_t boundary) //sint16
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByte++;
|
||||
*(uint16_t*)cl->nextFreeByte = moveBits(boundary, 16, 0); cl->nextFreeByte += 2;
|
||||
}
|
||||
|
||||
uint32_t boundary); //sint16
|
||||
void clInsertDepthOffset(ControlList* cl,
|
||||
uint32_t units, //float 187
|
||||
uint32_t factor) //float 187
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_DEPTH_OFFSET_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(factor, 16, 0) | moveBits(units, 16, 16); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
uint32_t factor); //float 187
|
||||
void clInsertClipWindow(ControlList* cl,
|
||||
uint32_t width, //uint16
|
||||
uint32_t height, //uint16
|
||||
uint32_t bottomPixelCoord, //uint16
|
||||
uint32_t leftPixelCoord) //uint16
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
uint32_t leftPixelCoord); //uint16
|
||||
void clInsertViewPortOffset(ControlList* cl,
|
||||
uint32_t x, //sint16
|
||||
uint32_t y //sint16
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = moveBits(x, 16, 0) | moveBits(y, 16, 16); cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
);
|
||||
void clInsertZMinMaxClippingPlanes(ControlList* cl,
|
||||
float minZw,
|
||||
float maxZw
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_Z_MIN_AND_MAX_CLIPPING_PLANES_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = minZw; cl->nextFreeByte += 4;
|
||||
*(float*)cl->nextFreeByte = maxZw; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
);
|
||||
void clInsertClipperXYScaling(ControlList* cl,
|
||||
float width, //half height in 1/16 of pixel
|
||||
float height //half width in 1/16 of pixel
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
|
||||
*(float*)cl->nextFreeByte = height; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
);
|
||||
void clInsertClipperZScaleOffset(ControlList* cl,
|
||||
float zOffset, //zc to zs
|
||||
float zScale //zc to zs
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByte++;
|
||||
*(float*)cl->nextFreeByte = zScale; cl->nextFreeByte += 4;
|
||||
*(float*)cl->nextFreeByte = zOffset; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
);
|
||||
void clInsertTileBinningModeConfiguration(ControlList* cl,
|
||||
uint32_t doubleBufferInNonMsMode, //0/1
|
||||
uint32_t tileAllocationBlockSize, //0/1/2/3 32/64/128/256 bytes
|
||||
@ -541,105 +122,10 @@ void clInsertTileBinningModeConfiguration(ControlList* cl,
|
||||
uint32_t tileStateDataArrayAddress, //16 byte aligned, size of 48 bytes * num tiles
|
||||
uint32_t tileAllocationMemorySize,
|
||||
uint32_t tileAllocationMemoryAddress
|
||||
)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = tileAllocationMemorySize; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = tileStateDataArrayAddress; cl->nextFreeByte += 4;
|
||||
uint32_t tileSizeW = 64;
|
||||
uint32_t tileSizeH = 64;
|
||||
|
||||
if(multisampleMode4x)
|
||||
{
|
||||
tileSizeW >>= 1;
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
if(tileBuffer64BitColorDepth)
|
||||
{
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
uint32_t widthInTiles = divRoundUp(widthInPixels, tileSizeW);
|
||||
uint32_t heightInTiles = divRoundUp(heightInPixels, tileSizeH);
|
||||
*(uint8_t*)cl->nextFreeByte = widthInTiles; cl->nextFreeByte++;
|
||||
*(uint8_t*)cl->nextFreeByte = heightInTiles; cl->nextFreeByte++;
|
||||
*cl->nextFreeByte =
|
||||
moveBits(multisampleMode4x, 1, 0) |
|
||||
moveBits(tileBuffer64BitColorDepth, 1, 1) |
|
||||
moveBits(autoInitializeTileStateDataArray, 1, 2) |
|
||||
moveBits(tileAllocationInitialBlockSize, 2, 3) |
|
||||
moveBits(tileAllocationBlockSize, 2, 5) |
|
||||
moveBits(doubleBufferInNonMsMode, 1, 7); cl->nextFreeByte++;
|
||||
}
|
||||
|
||||
/*
|
||||
void clInsertTileRenderingModeConfiguration(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t doubleBufferInNonMsMode, //0/1
|
||||
uint32_t earlyZEarlyCovDisable, //0/1
|
||||
uint32_t earlyZUpdateDirection, //0/1 lt,le/gt,ge
|
||||
uint32_t selectCoverageMode, //0/1
|
||||
uint32_t memoryFormat, //0/1/2 linear/t/lt
|
||||
uint32_t decimateMode, //0/1/2 0x/4x/16x
|
||||
uint32_t nonHDRFrameFormatColorFormat, //0/1/2 bgr565dithered/rgba8/bgr565nodither
|
||||
uint32_t tileBufferHDRMode, //0/1
|
||||
uint32_t multisampleMode4x, //0/1
|
||||
uint32_t widthPixels,
|
||||
uint32_t heightPixels)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
*cls->nextFreeByte = V3D21_TILE_RENDERING_MODE_CONFIGURATION_opcode; cls->nextFreeByte++;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
*(uint32_t*)cls->nextFreeByte = moveBits(widthPixels, 16, 0) | moveBits(heightPixels, 16, 16); cls->nextFreeByte += 4;
|
||||
*(uint16_t*)cls->nextFreeByte =
|
||||
moveBits(multisampleMode4x, 1, 0) |
|
||||
moveBits(tileBufferHDRMode, 1, 1) |
|
||||
moveBits(nonHDRFrameFormatColorFormat, 2, 2) |
|
||||
moveBits(decimateMode, 2, 4) |
|
||||
moveBits(memoryFormat, 2, 6) |
|
||||
moveBits(0, 1, 8) | //vg buffer enable
|
||||
moveBits(selectCoverageMode, 1, 9) |
|
||||
moveBits(earlyZUpdateDirection, 1, 10) |
|
||||
moveBits(earlyZEarlyCovDisable, 1, 11) |
|
||||
moveBits(doubleBufferInNonMsMode, 1, 12); cls->nextFreeByte += 2;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
void clInsertTileCoordinates(ControlList* cl,
|
||||
uint32_t tileColumnNumber, //int8
|
||||
uint32_t tileRowNumber) //int8
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_TILE_COORDINATES_opcode; cl->nextFreeByte++;
|
||||
*(uint16_t*)cl->nextFreeByte = moveBits(tileColumnNumber, 8, 0) | moveBits(tileRowNumber, 8, 8); cl->nextFreeByte += 2;
|
||||
}
|
||||
*/
|
||||
|
||||
);
|
||||
void clInsertGEMRelocations(ControlList* cl,
|
||||
uint32_t buffer0,
|
||||
uint32_t buffer1)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
*cl->nextFreeByte = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByte++;
|
||||
*(uint32_t*)cl->nextFreeByte = buffer0; cl->nextFreeByte += 4;
|
||||
*(uint32_t*)cl->nextFreeByte = buffer1; cl->nextFreeByte += 4;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
uint32_t buffer1);
|
||||
void clInsertShaderRecord(ControlList* cls,
|
||||
uint32_t fragmentShaderIsSingleThreaded, //0/1
|
||||
uint32_t pointSizeIncludedInShadedVertexData, //0/1
|
||||
@ -652,94 +138,13 @@ void clInsertShaderRecord(ControlList* cls,
|
||||
uint32_t vertexAttributeArraySelectBits,
|
||||
uint32_t vertexTotalAttributesSize,
|
||||
uint32_t vertexUniformsAddress,
|
||||
ControlListAddress vertexCodeAddress)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
//TODO is this correct?
|
||||
*cls->nextFreeByte =
|
||||
moveBits(fragmentShaderIsSingleThreaded, 1, 0) |
|
||||
moveBits(pointSizeIncludedInShadedVertexData, 1, 1) |
|
||||
moveBits(enableClipping, 1, 2); cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = 0; cls->nextFreeByte++;
|
||||
*(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
|
||||
*cls->nextFreeByte = fragmentNumberOfVaryings; cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &fragmentCodeAddress);
|
||||
*(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4;
|
||||
*(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4;
|
||||
|
||||
*(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
|
||||
*cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++;
|
||||
clEmitShaderRelocation(cls, &vertexCodeAddress);
|
||||
*(uint32_t*)cls->nextFreeByte = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); cls->nextFreeByte += 4; //???
|
||||
cls->nextFreeByte += 4;
|
||||
//skip coordinate shader stuff
|
||||
cls->nextFreeByte += 16;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl, handles cl)
|
||||
ControlListAddress vertexCodeAddress);
|
||||
void clInsertAttributeRecord(ControlList* cls,
|
||||
ControlListAddress address,
|
||||
uint32_t sizeBytes,
|
||||
uint32_t stride,
|
||||
uint32_t vertexVPMOffset)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
uint32_t sizeBytesMinusOne = sizeBytes - 1;
|
||||
//TODO is this correct?
|
||||
clEmitShaderRelocation(cls, &address);
|
||||
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
|
||||
*cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = stride; cls->nextFreeByte++;
|
||||
*cls->nextFreeByte = vertexVPMOffset; cls->nextFreeByte++;
|
||||
cls->nextFreeByte++; //skip coordinate shader stuff
|
||||
}
|
||||
|
||||
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle)
|
||||
{
|
||||
uint32_t c = 0;
|
||||
|
||||
uint32_t numHandles = clSize(handlesCl) / 4;
|
||||
|
||||
for(; c < numHandles; ++c)
|
||||
{
|
||||
if(((uint32_t*)handlesCl->buffer)[c] == handle)
|
||||
{
|
||||
//found
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
//write handle to handles cl
|
||||
*(uint32_t*)handlesCl->nextFreeByte = handle;
|
||||
handlesCl->nextFreeByte += 4;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
//input: 2 cls (cl + handles cl)
|
||||
static inline void clEmitShaderRelocation(ControlList* cls, const ControlListAddress* address)
|
||||
{
|
||||
assert(cls);
|
||||
assert(cls->buffer);
|
||||
assert(cls->nextFreeByte);
|
||||
assert(address);
|
||||
assert(address->handle);
|
||||
|
||||
//search for handle in handles cl
|
||||
//if found insert handle index
|
||||
|
||||
ControlList* cl = cls;
|
||||
ControlList* handlesCl = cls + 1;
|
||||
|
||||
//store offset within handles in cl
|
||||
*(uint32_t*)cl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle);
|
||||
cl->nextFreeByte += 4;
|
||||
}
|
||||
uint32_t vertexVPMOffset);
|
||||
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
48
driver/LinearAllocator.c
Normal file
48
driver/LinearAllocator.c
Normal file
@ -0,0 +1,48 @@
|
||||
#include "LinearAllocator.h"
|
||||
|
||||
#include "CustomAssert.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
LinearAllocator createLinearAllocator(char* b, unsigned s)
|
||||
{
|
||||
assert(b);
|
||||
assert(s > 0);
|
||||
|
||||
LinearAllocator la =
|
||||
{
|
||||
.buf = b,
|
||||
.offset = 0,
|
||||
.size = s
|
||||
};
|
||||
|
||||
return la;
|
||||
}
|
||||
|
||||
void destroyLinearAllocator(LinearAllocator* la)
|
||||
{
|
||||
la->buf = 0;
|
||||
la->offset = 0;
|
||||
la->size = 0;
|
||||
}
|
||||
|
||||
void* linearAllocte(LinearAllocator* la, unsigned s)
|
||||
{
|
||||
assert(la->buf);
|
||||
assert(la->size > 0);
|
||||
|
||||
if(la->offset + s >= la->size)
|
||||
{
|
||||
return 0; //no space left
|
||||
}
|
||||
|
||||
char* p = la->buf + la->offset + s;
|
||||
la->offset += s;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void linearFree(LinearAllocator* la, void* p)
|
||||
{
|
||||
//assert(0); //this shouldn't really happen, just destroy/reset the whole allocator
|
||||
}
|
@ -15,48 +15,10 @@ typedef struct LinearAllocator
|
||||
unsigned size;
|
||||
} LinearAllocator;
|
||||
|
||||
LinearAllocator createLinearAllocator(char* b, unsigned s)
|
||||
{
|
||||
assert(b);
|
||||
assert(s > 0);
|
||||
|
||||
LinearAllocator la =
|
||||
{
|
||||
.buf = b,
|
||||
.offset = 0,
|
||||
.size = s
|
||||
};
|
||||
|
||||
return la;
|
||||
}
|
||||
|
||||
void destroyLinearAllocator(LinearAllocator* la)
|
||||
{
|
||||
la->buf = 0;
|
||||
la->offset = 0;
|
||||
la->size = 0;
|
||||
}
|
||||
|
||||
void* linearAllocte(LinearAllocator* la, unsigned s)
|
||||
{
|
||||
assert(la->buf);
|
||||
assert(la->size > 0);
|
||||
|
||||
if(la->offset + s >= la->size)
|
||||
{
|
||||
return 0; //no space left
|
||||
}
|
||||
|
||||
char* p = la->buf + la->offset + s;
|
||||
la->offset += s;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void linearFree(LinearAllocator* la, void* p)
|
||||
{
|
||||
//assert(0); //this shouldn't really happen, just destroy/reset the whole allocator
|
||||
}
|
||||
LinearAllocator createLinearAllocator(char* b, unsigned s);
|
||||
void destroyLinearAllocator(LinearAllocator* la);
|
||||
void* linearAllocte(LinearAllocator* la, unsigned s);
|
||||
void linearFree(LinearAllocator* la, void* p);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
72
driver/PoolAllocator.c
Normal file
72
driver/PoolAllocator.c
Normal file
@ -0,0 +1,72 @@
|
||||
#include "PoolAllocator.h"
|
||||
|
||||
#include "CustomAssert.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
PoolAllocator createPoolAllocator(char* b, unsigned bs, unsigned s)
|
||||
{
|
||||
assert(b); //only allocated memory
|
||||
assert(bs >= sizeof(void*)); //we need to be able to store
|
||||
assert(s%bs==0); //we want a size that is the exact multiple of block size
|
||||
assert(s > bs); //at least 1 element
|
||||
|
||||
PoolAllocator pa =
|
||||
{
|
||||
.buf = b,
|
||||
.nextFreeBlock = (uint32_t*)b,
|
||||
.blockSize = bs,
|
||||
.size = s
|
||||
};
|
||||
|
||||
//initialize linked list of free pointers
|
||||
uint32_t* ptr = pa.nextFreeBlock;
|
||||
for(unsigned c = 0; c < s/bs - 1; ++c)
|
||||
{
|
||||
*ptr = (uint32_t)ptr + bs;
|
||||
ptr += bs;
|
||||
}
|
||||
|
||||
*ptr = 0; //last element
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
void destroyPoolAllocator(PoolAllocator* pa)
|
||||
{
|
||||
//actual memory freeing is done by caller
|
||||
pa->buf = 0;
|
||||
pa->nextFreeBlock = 0;
|
||||
pa->blockSize = 0;
|
||||
pa->size = 0;
|
||||
}
|
||||
|
||||
void* poolAllocate(PoolAllocator* pa)
|
||||
{
|
||||
assert(pa->buf);
|
||||
|
||||
if(!pa->nextFreeBlock)
|
||||
{
|
||||
return 0; //no free blocks
|
||||
}
|
||||
|
||||
//next free block will be allocated
|
||||
void* ret = pa->nextFreeBlock;
|
||||
|
||||
//set next free block to the one the current next points to
|
||||
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void poolFree(PoolAllocator* pa, void* p)
|
||||
{
|
||||
assert(pa->buf);
|
||||
assert(p);
|
||||
|
||||
//set block to be freed to point to the current next free block
|
||||
*(uint32_t*)p = (uint32_t)pa->nextFreeBlock;
|
||||
|
||||
//set next free block to the freshly freed block
|
||||
pa->nextFreeBlock = p;
|
||||
}
|
@ -16,72 +16,10 @@ typedef struct PoolAllocator
|
||||
unsigned size; //size is exact multiple of block size
|
||||
} PoolAllocator;
|
||||
|
||||
PoolAllocator createPoolAllocator(char* b, unsigned bs, unsigned s)
|
||||
{
|
||||
assert(b); //only allocated memory
|
||||
assert(bs >= sizeof(void*)); //we need to be able to store
|
||||
assert(s%bs==0); //we want a size that is the exact multiple of block size
|
||||
assert(s > bs); //at least 1 element
|
||||
|
||||
PoolAllocator pa =
|
||||
{
|
||||
.buf = b,
|
||||
.nextFreeBlock = (uint32_t*)b,
|
||||
.blockSize = bs,
|
||||
.size = s
|
||||
};
|
||||
|
||||
//initialize linked list of free pointers
|
||||
uint32_t* ptr = pa.nextFreeBlock;
|
||||
for(unsigned c = 0; c < s/bs - 1; ++c)
|
||||
{
|
||||
*ptr = (uint32_t)ptr + bs;
|
||||
ptr += bs;
|
||||
}
|
||||
|
||||
*ptr = 0; //last element
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
void destroyPoolAllocator(PoolAllocator* pa)
|
||||
{
|
||||
//actual memory freeing is done by caller
|
||||
pa->buf = 0;
|
||||
pa->nextFreeBlock = 0;
|
||||
pa->blockSize = 0;
|
||||
pa->size = 0;
|
||||
}
|
||||
|
||||
void* poolAllocate(PoolAllocator* pa)
|
||||
{
|
||||
assert(pa->buf);
|
||||
|
||||
if(!pa->nextFreeBlock)
|
||||
{
|
||||
return 0; //no free blocks
|
||||
}
|
||||
|
||||
//next free block will be allocated
|
||||
void* ret = pa->nextFreeBlock;
|
||||
|
||||
//set next free block to the one the current next points to
|
||||
pa->nextFreeBlock = (uint32_t*)*pa->nextFreeBlock;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void poolFree(PoolAllocator* pa, void* p)
|
||||
{
|
||||
assert(pa->buf);
|
||||
assert(p);
|
||||
|
||||
//set block to be freed to point to the current next free block
|
||||
*(uint32_t*)p = (uint32_t)pa->nextFreeBlock;
|
||||
|
||||
//set next free block to the freshly freed block
|
||||
pa->nextFreeBlock = p;
|
||||
}
|
||||
PoolAllocator createPoolAllocator(char* b, unsigned bs, unsigned s);
|
||||
void destroyPoolAllocator(PoolAllocator* pa);
|
||||
void* poolAllocate(PoolAllocator* pa);
|
||||
void poolFree(PoolAllocator* pa, void* p);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
449
driver/command.c
Normal file
449
driver/command.c
Normal file
@ -0,0 +1,449 @@
|
||||
#include "common.h"
|
||||
|
||||
#include "kernel/vc4_packet.h"
|
||||
#include "../brcm/cle/v3d_decoder.h"
|
||||
#include "../brcm/clif/clif_dump.h"
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffers-pools
|
||||
* Command pools are opaque objects that command buffer memory is allocated from, and which allow the implementation to amortize the
|
||||
* cost of resource creation across multiple command buffers. Command pools are externally synchronized, meaning that a command pool must
|
||||
* not be used concurrently in multiple threads. That includes use via recording commands on any command buffers allocated from the pool,
|
||||
* as well as operations that allocate, free, and reset command buffers or the pool itself.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool(
|
||||
VkDevice device,
|
||||
const VkCommandPoolCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkCommandPool* pCommandPool)
|
||||
{
|
||||
assert(device);
|
||||
assert(pCreateInfo);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//VK_COMMAND_POOL_CREATE_TRANSIENT_BIT
|
||||
//specifies that command buffers allocated from the pool will be short-lived, meaning that they will be reset or freed in a relatively short timeframe.
|
||||
//This flag may be used by the implementation to control memory allocation behavior within the pool.
|
||||
//--> definitely use pool allocator
|
||||
|
||||
//VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
|
||||
//allows any command buffer allocated from a pool to be individually reset to the initial state; either by calling vkResetCommandBuffer, or via the implicit reset when calling vkBeginCommandBuffer.
|
||||
//If this flag is not set on a pool, then vkResetCommandBuffer must not be called for any command buffer allocated from that pool.
|
||||
|
||||
//TODO pool family ignored for now
|
||||
|
||||
_commandPool* cp = malloc(sizeof(_commandPool));
|
||||
|
||||
if(!cp)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
cp->queueFamilyIndex = pCreateInfo->queueFamilyIndex;
|
||||
|
||||
//initial number of command buffers to hold
|
||||
int numCommandBufs = 100;
|
||||
int controlListSize = ARM_PAGE_SIZE * 100;
|
||||
|
||||
//if(pCreateInfo->flags & VK_COMMAND_POOL_CREATE_TRANSIENT_BIT)
|
||||
{
|
||||
//use pool allocator
|
||||
void* pamem = malloc(numCommandBufs * sizeof(_commandBuffer));
|
||||
if(!pamem)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
cp->pa = createPoolAllocator(pamem, sizeof(_commandBuffer), numCommandBufs * sizeof(_commandBuffer));
|
||||
|
||||
void* cpamem = malloc(controlListSize);
|
||||
if(!cpamem)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
cp->cpa = createConsecutivePoolAllocator(cpamem, ARM_PAGE_SIZE, controlListSize);
|
||||
}
|
||||
|
||||
*pCommandPool = (VkCommandPool)cp;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffer-allocation
|
||||
* vkAllocateCommandBuffers can be used to create multiple command buffers. If the creation of any of those command buffers fails,
|
||||
* the implementation must destroy all successfully created command buffer objects from this command, set all entries of the pCommandBuffers array to NULL and return the error.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
|
||||
VkDevice device,
|
||||
const VkCommandBufferAllocateInfo* pAllocateInfo,
|
||||
VkCommandBuffer* pCommandBuffers)
|
||||
{
|
||||
assert(device);
|
||||
assert(pAllocateInfo);
|
||||
assert(pCommandBuffers);
|
||||
|
||||
VkResult res = VK_SUCCESS;
|
||||
|
||||
_commandPool* cp = (_commandPool*)pAllocateInfo->commandPool;
|
||||
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
|
||||
{
|
||||
pCommandBuffers[c] = poolAllocate(&cp->pa);
|
||||
|
||||
if(!pCommandBuffers[c])
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
pCommandBuffers[c]->shaderRecCount = 0;
|
||||
pCommandBuffers[c]->usageFlags = 0;
|
||||
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
|
||||
pCommandBuffers[c]->cp = cp;
|
||||
clInit(&pCommandBuffers[c]->binCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
clInit(&pCommandBuffers[c]->handlesCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
|
||||
if(!pCommandBuffers[c]->binCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
if(!pCommandBuffers[c]->handlesCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
if(!pCommandBuffers[c]->shaderRecCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
if(!pCommandBuffers[c]->uniformsCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(res != VK_SUCCESS)
|
||||
{
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
|
||||
{
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
poolFree(&cp->pa, pCommandBuffers[c]);
|
||||
pCommandBuffers[c] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkBeginCommandBuffer
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCommandBufferBeginInfo* pBeginInfo)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
assert(pBeginInfo);
|
||||
|
||||
//TODO
|
||||
|
||||
//VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
|
||||
//specifies that each recording of the command buffer will only be submitted once, and the command buffer will be reset and recorded again between each submission.
|
||||
|
||||
//VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT
|
||||
//specifies that a secondary command buffer is considered to be entirely inside a render pass. If this is a primary command buffer, then this bit is ignored
|
||||
|
||||
//VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT
|
||||
//specifies that a command buffer can be resubmitted to a queue while it is in the pending state, and recorded into multiple primary command buffers
|
||||
|
||||
//When a command buffer begins recording, all state in that command buffer is undefined
|
||||
|
||||
struct drm_vc4_submit_cl submitCl =
|
||||
{
|
||||
.color_read.hindex = ~0,
|
||||
.zs_read.hindex = ~0,
|
||||
.color_write.hindex = ~0,
|
||||
.msaa_color_write.hindex = ~0,
|
||||
.zs_write.hindex = ~0,
|
||||
.msaa_zs_write.hindex = ~0,
|
||||
};
|
||||
|
||||
commandBuffer->usageFlags = pBeginInfo->flags;
|
||||
commandBuffer->shaderRecCount = 0;
|
||||
commandBuffer->state = CMDBUF_STATE_RECORDING;
|
||||
commandBuffer->submitCl = submitCl;
|
||||
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEndCommandBuffer
|
||||
* If there was an error during recording, the application will be notified by an unsuccessful return code returned by vkEndCommandBuffer.
|
||||
* If the application wishes to further use the command buffer, the command buffer must be reset. The command buffer must have been in the recording state,
|
||||
* and is moved to the executable state.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer(
|
||||
VkCommandBuffer commandBuffer)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
|
||||
//Increment the semaphore indicating that binning is done and
|
||||
//unblocking the render thread. Note that this doesn't act
|
||||
//until the FLUSH completes.
|
||||
//The FLUSH caps all of our bin lists with a
|
||||
//VC4_PACKET_RETURN.
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length);
|
||||
clInsertIncrementSemaphore(&commandBuffer->binCl);
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length);
|
||||
clInsertFlush(&commandBuffer->binCl);
|
||||
|
||||
commandBuffer->state = CMDBUF_STATE_EXECUTABLE;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueueSubmit
|
||||
* vkQueueSubmit is a queue submission command, with each batch defined by an element of pSubmits as an instance of the VkSubmitInfo structure.
|
||||
* Batches begin execution in the order they appear in pSubmits, but may complete out of order.
|
||||
* Fence and semaphore operations submitted with vkQueueSubmit have additional ordering constraints compared to other submission commands,
|
||||
* with dependencies involving previous and subsequent queue operations. Information about these additional constraints can be found in the semaphore and
|
||||
* fence sections of the synchronization chapter.
|
||||
* Details on the interaction of pWaitDstStageMask with synchronization are described in the semaphore wait operation section of the synchronization chapter.
|
||||
* The order that batches appear in pSubmits is used to determine submission order, and thus all the implicit ordering guarantees that respect it.
|
||||
* Other than these implicit ordering guarantees and any explicit synchronization primitives, these batches may overlap or otherwise execute out of order.
|
||||
* If any command buffer submitted to this queue is in the executable state, it is moved to the pending state. Once execution of all submissions of a command buffer complete,
|
||||
* it moves from the pending state, back to the executable state. If a command buffer was recorded with the VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT flag,
|
||||
* it instead moves back to the invalid state.
|
||||
* If vkQueueSubmit fails, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
|
||||
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced by the submitted command buffers and any semaphores
|
||||
* referenced by pSubmits is unaffected by the call or its failure. If vkQueueSubmit fails in such a way that the implementation is unable to make that guarantee,
|
||||
* the implementation must return VK_ERROR_DEVICE_LOST. See Lost Device.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
|
||||
VkQueue queue,
|
||||
uint32_t submitCount,
|
||||
const VkSubmitInfo* pSubmits,
|
||||
VkFence fence)
|
||||
{
|
||||
assert(queue);
|
||||
|
||||
for(int c = 0; c < pSubmits->waitSemaphoreCount; ++c)
|
||||
{
|
||||
sem_wait((sem_t*)pSubmits->pWaitSemaphores[c]);
|
||||
}
|
||||
|
||||
//TODO: deal with pSubmits->pWaitDstStageMask
|
||||
|
||||
//TODO wait for fence??
|
||||
|
||||
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
|
||||
{
|
||||
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_EXECUTABLE)
|
||||
{
|
||||
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_PENDING;
|
||||
}
|
||||
}
|
||||
|
||||
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
|
||||
{
|
||||
VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c];
|
||||
|
||||
cmdbuf->submitCl.bo_handles = cmdbuf->handlesCl.buffer;
|
||||
cmdbuf->submitCl.bo_handle_count = clSize(&cmdbuf->handlesCl) / 4;
|
||||
cmdbuf->submitCl.bin_cl = cmdbuf->binCl.buffer;
|
||||
cmdbuf->submitCl.bin_cl_size = clSize(&cmdbuf->binCl);
|
||||
cmdbuf->submitCl.shader_rec = cmdbuf->shaderRecCl.buffer;
|
||||
cmdbuf->submitCl.shader_rec_size = clSize(&cmdbuf->shaderRecCl);
|
||||
cmdbuf->submitCl.shader_rec_count = cmdbuf->shaderRecCount;
|
||||
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
|
||||
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
|
||||
|
||||
printf("BCL:\n");
|
||||
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
|
||||
printf("BO handles: ");
|
||||
for(int d = 0; d < cmdbuf->submitCl.bo_handle_count; ++d)
|
||||
{
|
||||
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.bo_handles)+d));
|
||||
}
|
||||
printf("\nwidth height: %u, %u\n", cmdbuf->submitCl.width, cmdbuf->submitCl.height);
|
||||
printf("tile min/max: %u,%u %u,%u\n", cmdbuf->submitCl.min_x_tile, cmdbuf->submitCl.min_y_tile, cmdbuf->submitCl.max_x_tile, cmdbuf->submitCl.max_y_tile);
|
||||
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_read.hindex, cmdbuf->submitCl.color_read.offset, cmdbuf->submitCl.color_read.bits, cmdbuf->submitCl.color_read.flags);
|
||||
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_write.hindex, cmdbuf->submitCl.color_write.offset, cmdbuf->submitCl.color_write.bits, cmdbuf->submitCl.color_write.flags);
|
||||
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_read.hindex, cmdbuf->submitCl.zs_read.offset, cmdbuf->submitCl.zs_read.bits, cmdbuf->submitCl.zs_read.flags);
|
||||
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_write.hindex, cmdbuf->submitCl.zs_write.offset, cmdbuf->submitCl.zs_write.bits, cmdbuf->submitCl.zs_write.flags);
|
||||
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_color_write.hindex, cmdbuf->submitCl.msaa_color_write.offset, cmdbuf->submitCl.msaa_color_write.bits, cmdbuf->submitCl.msaa_color_write.flags);
|
||||
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_zs_write.hindex, cmdbuf->submitCl.msaa_zs_write.offset, cmdbuf->submitCl.msaa_zs_write.bits, cmdbuf->submitCl.msaa_zs_write.flags);
|
||||
printf("clear color packed rgba %u %u\n", cmdbuf->submitCl.clear_color[0], cmdbuf->submitCl.clear_color[1]);
|
||||
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
|
||||
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
|
||||
printf("flags %u\n", cmdbuf->submitCl.flags);
|
||||
|
||||
|
||||
//submit ioctl
|
||||
static uint64_t lastFinishedSeqno = 0;
|
||||
vc4_cl_submit(controlFd, &cmdbuf->submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
|
||||
}
|
||||
|
||||
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
|
||||
{
|
||||
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_PENDING)
|
||||
{
|
||||
if(pSubmits->pCommandBuffers[c]->usageFlags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)
|
||||
{
|
||||
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_INVALID;
|
||||
}
|
||||
else
|
||||
{
|
||||
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_EXECUTABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int c = 0; c < pSubmits->signalSemaphoreCount; ++c)
|
||||
{
|
||||
sem_post((sem_t*)pSubmits->pSignalSemaphores[c]);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkFreeCommandBuffers
|
||||
* Any primary command buffer that is in the recording or executable state and has any element of pCommandBuffers recorded into it, becomes invalid.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers(
|
||||
VkDevice device,
|
||||
VkCommandPool commandPool,
|
||||
uint32_t commandBufferCount,
|
||||
const VkCommandBuffer* pCommandBuffers)
|
||||
{
|
||||
assert(device);
|
||||
assert(commandPool);
|
||||
assert(pCommandBuffers);
|
||||
|
||||
_commandPool* cp = (_commandPool*)commandPool;
|
||||
|
||||
for(int c = 0; c < commandBufferCount; ++c)
|
||||
{
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
poolFree(&cp->pa, pCommandBuffers[c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyCommandPool
|
||||
* When a pool is destroyed, all command buffers allocated from the pool are freed.
|
||||
* Any primary command buffer allocated from another VkCommandPool that is in the recording or executable state and has a secondary command buffer
|
||||
* allocated from commandPool recorded into it, becomes invalid.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool(
|
||||
VkDevice device,
|
||||
VkCommandPool commandPool,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
assert(commandPool);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
_commandPool* cp = (_commandPool*)commandPool;
|
||||
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
free(cp->pa.buf);
|
||||
free(cp->cpa.buf);
|
||||
destroyPoolAllocator(&cp->pa);
|
||||
destroyConsecutivePoolAllocator(&cp->cpa);
|
||||
}
|
||||
|
||||
free(cp);
|
||||
}
|
||||
|
||||
void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize)
|
||||
{
|
||||
if(!clHasEnoughSpace(cl, commandSize))
|
||||
{
|
||||
uint32_t currSize = clSize(cl);
|
||||
cl->buffer = consecutivePoolReAllocate(&cb->cp->cpa, cl->buffer, cl->numBlocks); assert(cl->buffer);
|
||||
cl->nextFreeByte = cl->buffer + currSize;
|
||||
}
|
||||
}
|
||||
|
||||
void clDump(void* cl, uint32_t size)
|
||||
{
|
||||
struct v3d_device_info devinfo = {
|
||||
/* While the driver supports V3D 2.1 and 2.6, we haven't split
|
||||
* off a 2.6 XML yet (there are a couple of fields different
|
||||
* in render target formatting)
|
||||
*/
|
||||
.ver = 21,
|
||||
};
|
||||
struct v3d_spec* spec = v3d_spec_load(&devinfo);
|
||||
|
||||
struct clif_dump *clif = clif_dump_init(&devinfo, stderr, true);
|
||||
|
||||
uint32_t offset = 0, hw_offset = 0;
|
||||
uint8_t *p = cl;
|
||||
|
||||
while (offset < size) {
|
||||
struct v3d_group *inst = v3d_spec_find_instruction(spec, p);
|
||||
uint8_t header = *p;
|
||||
uint32_t length;
|
||||
|
||||
if (inst == NULL) {
|
||||
printf("0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
|
||||
offset, hw_offset, header, header);
|
||||
return;
|
||||
}
|
||||
|
||||
length = v3d_group_get_length(inst);
|
||||
|
||||
printf("0x%08x 0x%08x: 0x%02x %s\n",
|
||||
offset, hw_offset, header, v3d_group_get_name(inst));
|
||||
|
||||
v3d_print_group(clif, inst, offset, p);
|
||||
|
||||
switch (header) {
|
||||
case VC4_PACKET_HALT:
|
||||
case VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF:
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
offset += length;
|
||||
if (header != VC4_PACKET_GEM_HANDLES)
|
||||
hw_offset += length;
|
||||
p += length;
|
||||
}
|
||||
|
||||
clif_dump_destroy(clif);
|
||||
}
|
328
driver/common.c
Normal file
328
driver/common.c
Normal file
@ -0,0 +1,328 @@
|
||||
#include "common.h"
|
||||
|
||||
#include "kernel/vc4_packet.h"
|
||||
|
||||
void createImageBO(_image* i)
|
||||
{
|
||||
assert(i);
|
||||
assert(i->format);
|
||||
assert(i->width);
|
||||
assert(i->height);
|
||||
|
||||
uint32_t bpp = getFormatBpp(i->format);
|
||||
uint32_t pixelSizeBytes = bpp / 8;
|
||||
uint32_t nonPaddedSize = i->width * i->height * pixelSizeBytes;
|
||||
i->paddedWidth = i->width;
|
||||
i->paddedHeight = i->height;
|
||||
|
||||
//need to pad to T format, as HW automatically chooses that
|
||||
if(nonPaddedSize > 4096)
|
||||
{
|
||||
getPaddedTextureDimensionsT(i->width, i->height, bpp, &i->paddedWidth, &i->paddedHeight);
|
||||
}
|
||||
|
||||
i->size = i->paddedWidth * i->paddedHeight * pixelSizeBytes;
|
||||
i->stride = i->paddedWidth * pixelSizeBytes;
|
||||
i->handle = vc4_bo_alloc(controlFd, i->size, "swapchain image"); assert(i->handle);
|
||||
|
||||
//set tiling to T if size > 4KB
|
||||
if(nonPaddedSize > 4096)
|
||||
{
|
||||
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret);
|
||||
i->tiling = VC4_TILING_FORMAT_T;
|
||||
}
|
||||
else
|
||||
{
|
||||
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_LINEAR); assert(ret);
|
||||
i->tiling = VC4_TILING_FORMAT_LT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdClearColorImage
|
||||
* Color and depth/stencil images can be cleared outside a render pass instance using vkCmdClearColorImage or vkCmdClearDepthStencilImage, respectively.
|
||||
* These commands are only allowed outside of a render pass instance.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkImage image,
|
||||
VkImageLayout imageLayout,
|
||||
const VkClearColorValue* pColor,
|
||||
uint32_t rangeCount,
|
||||
const VkImageSubresourceRange* pRanges)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
assert(image);
|
||||
assert(pColor);
|
||||
|
||||
//TODO this should only flag an image for clearing. This can only be called outside a renderpass
|
||||
//actual clearing would only happen:
|
||||
// -if image is rendered to (insert clear before first draw call)
|
||||
// -if the image is bound for sampling (submit a CL with a clear)
|
||||
// -if a command buffer is submitted without any rendering (insert clear)
|
||||
// -etc.
|
||||
//we shouldn't clear an image if noone uses it
|
||||
|
||||
//TODO ranges support
|
||||
|
||||
assert(imageLayout == VK_IMAGE_LAYOUT_GENERAL ||
|
||||
imageLayout == VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR ||
|
||||
imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
||||
assert(commandBuffer->state == CMDBUF_STATE_RECORDING);
|
||||
assert(_queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_GRAPHICS_BIT || _queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_COMPUTE_BIT);
|
||||
|
||||
_image* i = image;
|
||||
|
||||
assert(i->usageBits & VK_IMAGE_USAGE_TRANSFER_DST_BIT);
|
||||
|
||||
//TODO externally sync cmdbuf, cmdpool
|
||||
|
||||
i->needToClear = 1;
|
||||
i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
|
||||
}
|
||||
|
||||
int findInstanceExtension(char* name)
|
||||
{
|
||||
for(int c = 0; c < numInstanceExtensions; ++c)
|
||||
{
|
||||
if(strcmp(instanceExtensions[c].extensionName, name) == 0)
|
||||
{
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int findDeviceExtension(char* name)
|
||||
{
|
||||
for(int c = 0; c < numDeviceExtensions; ++c)
|
||||
{
|
||||
if(strcmp(deviceExtensions[c].extensionName, name) == 0)
|
||||
{
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
//Textures in T format:
|
||||
//formed out of 4KB tiles, which have 1KB subtiles (see page 105 in VC4 arch guide)
|
||||
//1KB subtiles have 512b microtiles.
|
||||
//Width/height of the 512b microtiles is the following:
|
||||
// 64bpp: 2x4
|
||||
// 32bpp: 4x4
|
||||
// 16bpp: 8x4
|
||||
// 8bpp: 8x8
|
||||
// 4bpp: 16x8
|
||||
// 1bpp: 32x16
|
||||
//Therefore width/height of 1KB subtiles is the following:
|
||||
// 64bpp: 8x16
|
||||
// 32bpp: 16x16
|
||||
// 16bpp: 32x16
|
||||
// 8bpp: 32x32
|
||||
// 4bpp: 64x32
|
||||
// 1bpp: 128x64
|
||||
//Finally width/height of the 4KB tiles:
|
||||
// 64bpp: 16x32
|
||||
// 32bpp: 32x32
|
||||
// 16bpp: 64x32
|
||||
// 8bpp: 64x64
|
||||
// 4bpp: 128x64
|
||||
// 1bpp: 256x128
|
||||
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight)
|
||||
{
|
||||
assert(paddedWidth);
|
||||
assert(paddedHeight);
|
||||
uint32_t tileW = 0;
|
||||
uint32_t tileH = 0;
|
||||
|
||||
switch(bpp)
|
||||
{
|
||||
case 64:
|
||||
{
|
||||
tileW = 16;
|
||||
tileH = 32;
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
{
|
||||
tileW = 32;
|
||||
tileH = 32;
|
||||
break;
|
||||
}
|
||||
case 16:
|
||||
{
|
||||
tileW = 64;
|
||||
tileH = 32;
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
tileW = 64;
|
||||
tileH = 64;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
tileW = 128;
|
||||
tileH = 64;
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
tileW = 256;
|
||||
tileH = 128;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
assert(0); //unsupported
|
||||
}
|
||||
}
|
||||
|
||||
*paddedWidth = ((tileW - (width % tileW)) % tileW) + width;
|
||||
*paddedHeight = ((tileH - (height % tileH)) % tileH) + height;
|
||||
}
|
||||
|
||||
uint32_t getFormatBpp(VkFormat f)
|
||||
{
|
||||
switch(f)
|
||||
{
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
||||
return 64;
|
||||
case VK_FORMAT_R8G8B8_UNORM: //padded to 32
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
return 32;
|
||||
return 32;
|
||||
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
|
||||
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
|
||||
case VK_FORMAT_R5G6B5_UNORM_PACK16:
|
||||
case VK_FORMAT_R8G8_UNORM:
|
||||
case VK_FORMAT_R16_SFLOAT:
|
||||
case VK_FORMAT_R16_SINT:
|
||||
return 16;
|
||||
case VK_FORMAT_R8_UNORM:
|
||||
case VK_FORMAT_R8_SINT:
|
||||
return 8;
|
||||
default:
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t packVec4IntoABGR8(const float rgba[4])
|
||||
{
|
||||
uint8_t r, g, b, a;
|
||||
r = rgba[0] * 255.0;
|
||||
g = rgba[1] * 255.0;
|
||||
b = rgba[2] * 255.0;
|
||||
a = rgba[3] * 255.0;
|
||||
|
||||
uint32_t res = 0 |
|
||||
(a << 24) |
|
||||
(b << 16) |
|
||||
(g << 8) |
|
||||
(r << 0);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*static inline void util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)
|
||||
{
|
||||
ubyte r = 0;
|
||||
ubyte g = 0;
|
||||
ubyte b = 0;
|
||||
ubyte a = 0;
|
||||
|
||||
if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) <= 8) {
|
||||
r = float_to_ubyte(rgba[0]);
|
||||
g = float_to_ubyte(rgba[1]);
|
||||
b = float_to_ubyte(rgba[2]);
|
||||
a = float_to_ubyte(rgba[3]);
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_ABGR8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | a;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_XBGR8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | 0xff;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (a << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_BGRX8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (0xffu << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_ARGB8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | a;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_XRGB8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | 0xff;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
{
|
||||
uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B5G5R5X1_UNORM:
|
||||
{
|
||||
uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B5G5R5A1_UNORM:
|
||||
{
|
||||
uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B4G4R4A4_UNORM:
|
||||
{
|
||||
uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
{
|
||||
uc->ub = a;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
{
|
||||
uc->ub = r;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
{
|
||||
uc->f[0] = rgba[0];
|
||||
uc->f[1] = rgba[1];
|
||||
uc->f[2] = rgba[2];
|
||||
uc->f[3] = rgba[3];
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
{
|
||||
uc->f[0] = rgba[0];
|
||||
uc->f[1] = rgba[1];
|
||||
uc->f[2] = rgba[2];
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
util_format_write_4f(format, rgba, 0, uc, 0, 0, 0, 1, 1);
|
||||
}
|
||||
}*/
|
120
driver/common.h
Normal file
120
driver/common.h
Normal file
@ -0,0 +1,120 @@
|
||||
#pragma once
|
||||
|
||||
#include <drm/drm.h>
|
||||
#include <drm/drm_fourcc.h>
|
||||
#include <drm/vc4_drm.h>
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include "vkExt.h"
|
||||
|
||||
#include "AlignedAllocator.h"
|
||||
#include "PoolAllocator.h"
|
||||
#include "ConsecutivePoolAllocator.h"
|
||||
#include "LinearAllocator.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include "CustomAssert.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
|
||||
#include "modeset.h"
|
||||
#include "kernelInterface.h"
|
||||
#include "ControlListUtil.h"
|
||||
|
||||
#ifndef min
|
||||
#define min(a, b) (a < b ? a : b)
|
||||
#endif
|
||||
|
||||
#ifndef max
|
||||
#define max(a, b) (a > b ? a : b)
|
||||
#endif
|
||||
|
||||
#include "vkCaps.h"
|
||||
|
||||
typedef struct VkPhysicalDevice_T
|
||||
{
|
||||
//hardware id?
|
||||
int dummy;
|
||||
} _physicalDevice;
|
||||
|
||||
typedef struct VkQueue_T
|
||||
{
|
||||
uint64_t lastEmitSeqno;
|
||||
} _queue;
|
||||
|
||||
typedef struct VkCommandPool_T
|
||||
{
|
||||
PoolAllocator pa;
|
||||
ConsecutivePoolAllocator cpa;
|
||||
uint32_t queueFamilyIndex;
|
||||
} _commandPool;
|
||||
|
||||
typedef enum commandBufferState
|
||||
{
|
||||
CMDBUF_STATE_INITIAL = 0,
|
||||
CMDBUF_STATE_RECORDING,
|
||||
CMDBUF_STATE_EXECUTABLE,
|
||||
CMDBUF_STATE_PENDING,
|
||||
CMDBUF_STATE_INVALID,
|
||||
CMDBUF_STATE_LAST
|
||||
} commandBufferState;
|
||||
|
||||
typedef struct VkCommandBuffer_T
|
||||
{
|
||||
//Recorded commands include commands to bind pipelines and descriptor sets to the command buffer, commands to modify dynamic state, commands to draw (for graphics rendering),
|
||||
//commands to dispatch (for compute), commands to execute secondary command buffers (for primary command buffers only), commands to copy buffers and images, and other commands
|
||||
|
||||
struct drm_vc4_submit_cl submitCl;
|
||||
|
||||
ControlList binCl;
|
||||
ControlList shaderRecCl;
|
||||
uint32_t shaderRecCount;
|
||||
ControlList uniformsCl;
|
||||
ControlList handlesCl;
|
||||
commandBufferState state;
|
||||
VkCommandBufferUsageFlags usageFlags;
|
||||
_commandPool* cp;
|
||||
} _commandBuffer;
|
||||
|
||||
typedef struct VkInstance_T
|
||||
{
|
||||
//supposedly this should contain all the enabled layers?
|
||||
int enabledExtensions[numInstanceExtensions];
|
||||
int numEnabledExtensions;
|
||||
_physicalDevice dev;
|
||||
int chipVersion;
|
||||
int hasTiling;
|
||||
int hasControlFlow;
|
||||
int hasEtc1;
|
||||
int hasThreadedFs;
|
||||
int hasMadvise;
|
||||
} _instance;
|
||||
|
||||
typedef struct VkDevice_T
|
||||
{
|
||||
int enabledExtensions[numDeviceExtensions];
|
||||
int numEnabledExtensions;
|
||||
VkPhysicalDeviceFeatures enabledFeatures;
|
||||
_physicalDevice* dev;
|
||||
_queue* queues[numQueueFamilies];
|
||||
int numQueues[numQueueFamilies];
|
||||
} _device;
|
||||
|
||||
typedef struct VkSwapchain_T
|
||||
{
|
||||
_image* images;
|
||||
uint32_t numImages;
|
||||
uint32_t backbufferIdx;
|
||||
VkSurfaceKHR surface;
|
||||
} _swapchain;
|
||||
|
||||
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight);
|
||||
uint32_t getFormatBpp(VkFormat f);
|
||||
uint32_t packVec4IntoABGR8(const float rgba[4]);
|
||||
int findInstanceExtension(char* name);
|
||||
int findDeviceExtension(char* name);
|
||||
void createImageBO(_image* i);
|
314
driver/device.c
Normal file
314
driver/device.c
Normal file
@ -0,0 +1,314 @@
|
||||
#include "common.h"
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#devsandqueues-physical-device-enumeration
|
||||
* If pPhysicalDevices is NULL, then the number of physical devices available is returned in pPhysicalDeviceCount. Otherwise, pPhysicalDeviceCount must point to a
|
||||
* variable set by the user to the number of elements in the pPhysicalDevices array, and on return the variable is overwritten with the number of handles actually
|
||||
* written to pPhysicalDevices. If pPhysicalDeviceCount is less than the number of physical devices available, at most pPhysicalDeviceCount structures will be written.
|
||||
* If pPhysicalDeviceCount is smaller than the number of physical devices available, VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the
|
||||
* available physical devices were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices(
|
||||
VkInstance instance,
|
||||
uint32_t* pPhysicalDeviceCount,
|
||||
VkPhysicalDevice* pPhysicalDevices)
|
||||
{
|
||||
assert(instance);
|
||||
|
||||
//TODO is there a way to check if there's a gpu (and it's the rPi)?
|
||||
int gpuExists = access( "/dev/dri/card0", F_OK ) != -1;
|
||||
|
||||
int numGPUs = gpuExists;
|
||||
|
||||
assert(pPhysicalDeviceCount);
|
||||
|
||||
if(!pPhysicalDevices)
|
||||
{
|
||||
*pPhysicalDeviceCount = numGPUs;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pPhysicalDeviceCount;
|
||||
int elementsWritten = min(numGPUs, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pPhysicalDevices[c] = &instance->dev;
|
||||
}
|
||||
|
||||
*pPhysicalDeviceCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < arraySize)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceProperties
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties* pProperties)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pProperties);
|
||||
|
||||
VkPhysicalDeviceSparseProperties sparseProps =
|
||||
{
|
||||
.residencyStandard2DBlockShape = 1,
|
||||
.residencyStandard2DMultisampleBlockShape = 1,
|
||||
.residencyStandard3DBlockShape = 1,
|
||||
.residencyAlignedMipSize = 1,
|
||||
.residencyNonResidentStrict = 1
|
||||
};
|
||||
|
||||
pProperties->apiVersion = VK_MAKE_VERSION(1,1,0);
|
||||
pProperties->driverVersion = 1; //we'll simply call this v1
|
||||
pProperties->vendorID = 0x14E4; //Broadcom
|
||||
pProperties->deviceID = 0; //TODO dunno?
|
||||
pProperties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
|
||||
strcpy(pProperties->deviceName, "VideoCore IV HW");
|
||||
//pProperties->pipelineCacheUUID
|
||||
pProperties->limits = _limits;
|
||||
pProperties->sparseProperties = sparseProps;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceFeatures
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceFeatures* pFeatures)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pFeatures);
|
||||
|
||||
*pFeatures = _features;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateDeviceExtensionProperties
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const char* pLayerName,
|
||||
uint32_t* pPropertyCount,
|
||||
VkExtensionProperties* pProperties)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(!pLayerName); //layers ignored for now
|
||||
assert(pPropertyCount);
|
||||
|
||||
if(!pProperties)
|
||||
{
|
||||
*pPropertyCount = numDeviceExtensions;
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
int arraySize = *pPropertyCount;
|
||||
int elementsWritten = min(numDeviceExtensions, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pProperties[c] = deviceExtensions[c];
|
||||
}
|
||||
|
||||
*pPropertyCount = elementsWritten;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceQueueFamilyProperties
|
||||
* If pQueueFamilyProperties is NULL, then the number of queue families available is returned in pQueueFamilyPropertyCount.
|
||||
* Otherwise, pQueueFamilyPropertyCount must point to a variable set by the user to the number of elements in the pQueueFamilyProperties array,
|
||||
* and on return the variable is overwritten with the number of structures actually written to pQueueFamilyProperties. If pQueueFamilyPropertyCount
|
||||
* is less than the number of queue families available, at most pQueueFamilyPropertyCount structures will be written.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t* pQueueFamilyPropertyCount,
|
||||
VkQueueFamilyProperties* pQueueFamilyProperties)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pQueueFamilyPropertyCount);
|
||||
|
||||
if(!pQueueFamilyProperties)
|
||||
{
|
||||
*pQueueFamilyPropertyCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
int arraySize = *pQueueFamilyPropertyCount;
|
||||
int elementsWritten = min(numQueueFamilies, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pQueueFamilyProperties[c] = _queueFamilyProperties[c];
|
||||
}
|
||||
|
||||
*pQueueFamilyPropertyCount = elementsWritten;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceSupportKHR
|
||||
* does this queue family support presentation to this surface?
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t queueFamilyIndex,
|
||||
VkSurfaceKHR surface,
|
||||
VkBool32* pSupported)
|
||||
{
|
||||
assert(pSupported);
|
||||
assert(surface);
|
||||
assert(physicalDevice);
|
||||
|
||||
assert(queueFamilyIndex < numQueueFamilies);
|
||||
|
||||
//TODO if we plan to support headless rendering, there should be 2 families
|
||||
//one using /dev/dri/card0 which has modesetting
|
||||
//other using /dev/dri/renderD128 which does not support modesetting, this would say false here
|
||||
*pSupported = VK_TRUE;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateDevice
|
||||
* vkCreateDevice verifies that extensions and features requested in the ppEnabledExtensionNames and pEnabledFeatures
|
||||
* members of pCreateInfo, respectively, are supported by the implementation. If any requested extension is not supported,
|
||||
* vkCreateDevice must return VK_ERROR_EXTENSION_NOT_PRESENT. If any requested feature is not supported, vkCreateDevice must return
|
||||
* VK_ERROR_FEATURE_NOT_PRESENT. Support for extensions can be checked before creating a device by querying vkEnumerateDeviceExtensionProperties
|
||||
* After verifying and enabling the extensions the VkDevice object is created and returned to the application.
|
||||
* If a requested extension is only supported by a layer, both the layer and the extension need to be specified at vkCreateInstance
|
||||
* time for the creation to succeed. Multiple logical devices can be created from the same physical device. Logical device creation may
|
||||
* fail due to lack of device-specific resources (in addition to the other errors). If that occurs, vkCreateDevice will return VK_ERROR_TOO_MANY_OBJECTS.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkDevice* pDevice)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pDevice);
|
||||
assert(pCreateInfo);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
*pDevice = malloc(sizeof(_device));
|
||||
if(!pDevice)
|
||||
{
|
||||
return VK_ERROR_TOO_MANY_OBJECTS;
|
||||
}
|
||||
|
||||
(*pDevice)->dev = physicalDevice;
|
||||
|
||||
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
|
||||
{
|
||||
int findres = findDeviceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
|
||||
if(findres > -1)
|
||||
{
|
||||
(*pDevice)->enabledExtensions[(*pDevice)->numEnabledExtensions] = findres;
|
||||
(*pDevice)->numEnabledExtensions++;
|
||||
}
|
||||
else
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
VkBool32* requestedFeatures = pCreateInfo->pEnabledFeatures;
|
||||
VkBool32* supportedFeatures = &_features;
|
||||
|
||||
if(requestedFeatures)
|
||||
{
|
||||
for(int c = 0; c < numFeatures; ++c)
|
||||
{
|
||||
if(requestedFeatures[c] && !supportedFeatures[c])
|
||||
{
|
||||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
(*pDevice)->enabledFeatures = *pCreateInfo->pEnabledFeatures;
|
||||
}
|
||||
else
|
||||
{
|
||||
memset(&(*pDevice)->enabledFeatures, 0, sizeof((*pDevice)->enabledFeatures)); //just disable everything
|
||||
}
|
||||
|
||||
//layers ignored per spec
|
||||
//pCreateInfo->enabledLayerCount
|
||||
|
||||
for(int c = 0; c < numQueueFamilies; ++c)
|
||||
{
|
||||
(*pDevice)->queues[c] = 0;
|
||||
}
|
||||
|
||||
if(pCreateInfo->queueCreateInfoCount > 0)
|
||||
{
|
||||
for(int c = 0; c < pCreateInfo->queueCreateInfoCount; ++c)
|
||||
{
|
||||
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = malloc(sizeof(_queue)*pCreateInfo->pQueueCreateInfos[c].queueCount);
|
||||
|
||||
if(!(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex])
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
for(int d = 0; d < pCreateInfo->pQueueCreateInfos[c].queueCount; ++d)
|
||||
{
|
||||
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex][d].lastEmitSeqno = 0;
|
||||
}
|
||||
|
||||
(*pDevice)->numQueues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = pCreateInfo->pQueueCreateInfos[c].queueCount;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetDeviceQueue
|
||||
* vkGetDeviceQueue must only be used to get queues that were created with the flags parameter of VkDeviceQueueCreateInfo set to zero.
|
||||
* To get queues that were created with a non-zero flags parameter use vkGetDeviceQueue2.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue(
|
||||
VkDevice device,
|
||||
uint32_t queueFamilyIndex,
|
||||
uint32_t queueIndex,
|
||||
VkQueue* pQueue)
|
||||
{
|
||||
assert(device);
|
||||
assert(pQueue);
|
||||
|
||||
assert(queueFamilyIndex < numQueueFamilies);
|
||||
assert(queueIndex < device->numQueues[queueFamilyIndex]);
|
||||
|
||||
*pQueue = &device->queues[queueFamilyIndex][queueIndex];
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyDevice
|
||||
* To ensure that no work is active on the device, vkDeviceWaitIdle can be used to gate the destruction of the device.
|
||||
* Prior to destroying a device, an application is responsible for destroying/freeing any Vulkan objects that were created using that device as the
|
||||
* first parameter of the corresponding vkCreate* or vkAllocate* command
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroyDevice(
|
||||
VkDevice device,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO
|
||||
}
|
1938
driver/driver.c
1938
driver/driver.c
@ -1,1938 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include "CustomAssert.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include "vkExt.h"
|
||||
|
||||
#include "modeset.h"
|
||||
#include "kernelInterface.h"
|
||||
#include "ControlListUtil.h"
|
||||
|
||||
#include "AlignedAllocator.h"
|
||||
#include "PoolAllocator.h"
|
||||
#include "ConsecutivePoolAllocator.h"
|
||||
#include "LinearAllocator.h"
|
||||
|
||||
#include "kernel/vc4_packet.h"
|
||||
#include "../brcm/cle/v3d_decoder.h"
|
||||
#include "../brcm/clif/clif_dump.h"
|
||||
|
||||
#ifndef min
|
||||
#define min(a, b) (a < b ? a : b)
|
||||
#endif
|
||||
|
||||
#ifndef max
|
||||
#define max(a, b) (a > b ? a : b)
|
||||
#endif
|
||||
|
||||
#include "vkCaps.h"
|
||||
|
||||
typedef struct VkPhysicalDevice_T
|
||||
{
|
||||
//hardware id?
|
||||
int dummy;
|
||||
} _physicalDevice;
|
||||
|
||||
typedef struct VkQueue_T
|
||||
{
|
||||
uint64_t lastEmitSeqno;
|
||||
} _queue;
|
||||
|
||||
typedef struct VkCommandPool_T
|
||||
{
|
||||
PoolAllocator pa;
|
||||
ConsecutivePoolAllocator cpa;
|
||||
uint32_t queueFamilyIndex;
|
||||
} _commandPool;
|
||||
|
||||
typedef enum commandBufferState
|
||||
{
|
||||
CMDBUF_STATE_INITIAL = 0,
|
||||
CMDBUF_STATE_RECORDING,
|
||||
CMDBUF_STATE_EXECUTABLE,
|
||||
CMDBUF_STATE_PENDING,
|
||||
CMDBUF_STATE_INVALID,
|
||||
CMDBUF_STATE_LAST
|
||||
} commandBufferState;
|
||||
|
||||
typedef struct VkCommandBuffer_T
|
||||
{
|
||||
//Recorded commands include commands to bind pipelines and descriptor sets to the command buffer, commands to modify dynamic state, commands to draw (for graphics rendering),
|
||||
//commands to dispatch (for compute), commands to execute secondary command buffers (for primary command buffers only), commands to copy buffers and images, and other commands
|
||||
|
||||
struct drm_vc4_submit_cl submitCl;
|
||||
|
||||
ControlList binCl;
|
||||
ControlList shaderRecCl;
|
||||
uint32_t shaderRecCount;
|
||||
ControlList uniformsCl;
|
||||
ControlList handlesCl;
|
||||
commandBufferState state;
|
||||
VkCommandBufferUsageFlags usageFlags;
|
||||
_commandPool* cp;
|
||||
} _commandBuffer;
|
||||
|
||||
typedef struct VkInstance_T
|
||||
{
|
||||
//supposedly this should contain all the enabled layers?
|
||||
int enabledExtensions[numInstanceExtensions];
|
||||
int numEnabledExtensions;
|
||||
_physicalDevice dev;
|
||||
int chipVersion;
|
||||
int hasTiling;
|
||||
int hasControlFlow;
|
||||
int hasEtc1;
|
||||
int hasThreadedFs;
|
||||
int hasMadvise;
|
||||
} _instance;
|
||||
|
||||
typedef struct VkDevice_T
|
||||
{
|
||||
int enabledExtensions[numDeviceExtensions];
|
||||
int numEnabledExtensions;
|
||||
VkPhysicalDeviceFeatures enabledFeatures;
|
||||
_physicalDevice* dev;
|
||||
_queue* queues[numQueueFamilies];
|
||||
int numQueues[numQueueFamilies];
|
||||
} _device;
|
||||
|
||||
typedef struct VkSwapchain_T
|
||||
{
|
||||
_image* images;
|
||||
uint32_t numImages;
|
||||
uint32_t backbufferIdx;
|
||||
VkSurfaceKHR surface;
|
||||
} _swapchain;
|
||||
|
||||
void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize)
|
||||
{
|
||||
if(!clHasEnoughSpace(cl, commandSize))
|
||||
{
|
||||
uint32_t currSize = clSize(cl);
|
||||
cl->buffer = consecutivePoolReAllocate(&cb->cp->cpa, cl->buffer, cl->numBlocks); assert(cl->buffer);
|
||||
cl->nextFreeByte = cl->buffer + currSize;
|
||||
}
|
||||
}
|
||||
|
||||
void clDump(void* cl, uint32_t size)
|
||||
{
|
||||
struct v3d_device_info devinfo = {
|
||||
/* While the driver supports V3D 2.1 and 2.6, we haven't split
|
||||
* off a 2.6 XML yet (there are a couple of fields different
|
||||
* in render target formatting)
|
||||
*/
|
||||
.ver = 21,
|
||||
};
|
||||
struct v3d_spec* spec = v3d_spec_load(&devinfo);
|
||||
|
||||
struct clif_dump *clif = clif_dump_init(&devinfo, stderr, true);
|
||||
|
||||
uint32_t offset = 0, hw_offset = 0;
|
||||
uint8_t *p = cl;
|
||||
|
||||
while (offset < size) {
|
||||
struct v3d_group *inst = v3d_spec_find_instruction(spec, p);
|
||||
uint8_t header = *p;
|
||||
uint32_t length;
|
||||
|
||||
if (inst == NULL) {
|
||||
printf("0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
|
||||
offset, hw_offset, header, header);
|
||||
return;
|
||||
}
|
||||
|
||||
length = v3d_group_get_length(inst);
|
||||
|
||||
printf("0x%08x 0x%08x: 0x%02x %s\n",
|
||||
offset, hw_offset, header, v3d_group_get_name(inst));
|
||||
|
||||
v3d_print_group(clif, inst, offset, p);
|
||||
|
||||
switch (header) {
|
||||
case VC4_PACKET_HALT:
|
||||
case VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF:
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
offset += length;
|
||||
if (header != VC4_PACKET_GEM_HANDLES)
|
||||
hw_offset += length;
|
||||
p += length;
|
||||
}
|
||||
|
||||
clif_dump_destroy(clif);
|
||||
}
|
||||
|
||||
//Textures in T format:
|
||||
//formed out of 4KB tiles, which have 1KB subtiles (see page 105 in VC4 arch guide)
|
||||
//1KB subtiles have 512b microtiles.
|
||||
//Width/height of the 512b microtiles is the following:
|
||||
// 64bpp: 2x4
|
||||
// 32bpp: 4x4
|
||||
// 16bpp: 8x4
|
||||
// 8bpp: 8x8
|
||||
// 4bpp: 16x8
|
||||
// 1bpp: 32x16
|
||||
//Therefore width/height of 1KB subtiles is the following:
|
||||
// 64bpp: 8x16
|
||||
// 32bpp: 16x16
|
||||
// 16bpp: 32x16
|
||||
// 8bpp: 32x32
|
||||
// 4bpp: 64x32
|
||||
// 1bpp: 128x64
|
||||
//Finally width/height of the 4KB tiles:
|
||||
// 64bpp: 16x32
|
||||
// 32bpp: 32x32
|
||||
// 16bpp: 64x32
|
||||
// 8bpp: 64x64
|
||||
// 4bpp: 128x64
|
||||
// 1bpp: 256x128
|
||||
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight)
|
||||
{
|
||||
assert(paddedWidth);
|
||||
assert(paddedHeight);
|
||||
uint32_t tileW = 0;
|
||||
uint32_t tileH = 0;
|
||||
|
||||
switch(bpp)
|
||||
{
|
||||
case 64:
|
||||
{
|
||||
tileW = 16;
|
||||
tileH = 32;
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
{
|
||||
tileW = 32;
|
||||
tileH = 32;
|
||||
break;
|
||||
}
|
||||
case 16:
|
||||
{
|
||||
tileW = 64;
|
||||
tileH = 32;
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
{
|
||||
tileW = 64;
|
||||
tileH = 64;
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
tileW = 128;
|
||||
tileH = 64;
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
tileW = 256;
|
||||
tileH = 128;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
assert(0); //unsupported
|
||||
}
|
||||
}
|
||||
|
||||
*paddedWidth = ((tileW - (width % tileW)) % tileW) + width;
|
||||
*paddedHeight = ((tileH - (height % tileH)) % tileH) + height;
|
||||
}
|
||||
|
||||
uint32_t getFormatBpp(VkFormat f)
|
||||
{
|
||||
switch(f)
|
||||
{
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
||||
return 64;
|
||||
case VK_FORMAT_R8G8B8_UNORM: //padded to 32
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
return 32;
|
||||
return 32;
|
||||
case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
|
||||
case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
|
||||
case VK_FORMAT_R5G6B5_UNORM_PACK16:
|
||||
case VK_FORMAT_R8G8_UNORM:
|
||||
case VK_FORMAT_R16_SFLOAT:
|
||||
case VK_FORMAT_R16_SINT:
|
||||
return 16;
|
||||
case VK_FORMAT_R8_UNORM:
|
||||
case VK_FORMAT_R8_SINT:
|
||||
return 8;
|
||||
default:
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void createImageBO(_image* i)
|
||||
{
|
||||
assert(i);
|
||||
assert(i->format);
|
||||
assert(i->width);
|
||||
assert(i->height);
|
||||
|
||||
uint32_t bpp = getFormatBpp(i->format);
|
||||
uint32_t pixelSizeBytes = bpp / 8;
|
||||
uint32_t nonPaddedSize = i->width * i->height * pixelSizeBytes;
|
||||
i->paddedWidth = i->width;
|
||||
i->paddedHeight = i->height;
|
||||
|
||||
//need to pad to T format, as HW automatically chooses that
|
||||
if(nonPaddedSize > 4096)
|
||||
{
|
||||
getPaddedTextureDimensionsT(i->width, i->height, bpp, &i->paddedWidth, &i->paddedHeight);
|
||||
}
|
||||
|
||||
i->size = i->paddedWidth * i->paddedHeight * pixelSizeBytes;
|
||||
i->stride = i->paddedWidth * pixelSizeBytes;
|
||||
i->handle = vc4_bo_alloc(controlFd, i->size, "swapchain image"); assert(i->handle);
|
||||
|
||||
//set tiling to T if size > 4KB
|
||||
if(nonPaddedSize > 4096)
|
||||
{
|
||||
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret);
|
||||
i->tiling = VC4_TILING_FORMAT_T;
|
||||
}
|
||||
else
|
||||
{
|
||||
int ret = vc4_bo_set_tiling(controlFd, i->handle, DRM_FORMAT_MOD_LINEAR); assert(ret);
|
||||
i->tiling = VC4_TILING_FORMAT_LT;
|
||||
}
|
||||
}
|
||||
|
||||
/*static inline void util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)
|
||||
{
|
||||
ubyte r = 0;
|
||||
ubyte g = 0;
|
||||
ubyte b = 0;
|
||||
ubyte a = 0;
|
||||
|
||||
if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) <= 8) {
|
||||
r = float_to_ubyte(rgba[0]);
|
||||
g = float_to_ubyte(rgba[1]);
|
||||
b = float_to_ubyte(rgba[2]);
|
||||
a = float_to_ubyte(rgba[3]);
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_ABGR8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | a;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_XBGR8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (r << 24) | (g << 16) | (b << 8) | 0xff;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (a << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_BGRX8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (0xffu << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_ARGB8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | a;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_XRGB8888_UNORM:
|
||||
{
|
||||
uc->ui[0] = (b << 24) | (g << 16) | (r << 8) | 0xff;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
{
|
||||
uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B5G5R5X1_UNORM:
|
||||
{
|
||||
uc->us = ((0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B5G5R5A1_UNORM:
|
||||
{
|
||||
uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_B4G4R4A4_UNORM:
|
||||
{
|
||||
uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
{
|
||||
uc->ub = a;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
{
|
||||
uc->ub = r;
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
{
|
||||
uc->f[0] = rgba[0];
|
||||
uc->f[1] = rgba[1];
|
||||
uc->f[2] = rgba[2];
|
||||
uc->f[3] = rgba[3];
|
||||
}
|
||||
return;
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
{
|
||||
uc->f[0] = rgba[0];
|
||||
uc->f[1] = rgba[1];
|
||||
uc->f[2] = rgba[2];
|
||||
}
|
||||
return;
|
||||
|
||||
default:
|
||||
util_format_write_4f(format, rgba, 0, uc, 0, 0, 0, 1, 1);
|
||||
}
|
||||
}*/
|
||||
|
||||
uint32_t packVec4IntoABGR8(const float rgba[4])
|
||||
{
|
||||
uint8_t r, g, b, a;
|
||||
r = rgba[0] * 255.0;
|
||||
g = rgba[1] * 255.0;
|
||||
b = rgba[2] * 255.0;
|
||||
a = rgba[3] * 255.0;
|
||||
|
||||
uint32_t res = 0 |
|
||||
(a << 24) |
|
||||
(b << 16) |
|
||||
(g << 8) |
|
||||
(r << 0);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateInstanceExtensionProperties
|
||||
* When pLayerName parameter is NULL, only extensions provided by the Vulkan implementation or by implicitly enabled layers are returned. When pLayerName is the name of a layer,
|
||||
* the instance extensions provided by that layer are returned.
|
||||
* If pProperties is NULL, then the number of extensions properties available is returned in pPropertyCount. Otherwise, pPropertyCount must point to a variable set by the user
|
||||
* to the number of elements in the pProperties array, and on return the variable is overwritten with the number of structures actually written to pProperties.
|
||||
* If pPropertyCount is less than the number of extension properties available, at most pPropertyCount structures will be written. If pPropertyCount is smaller than the number of extensions available,
|
||||
* VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the available properties were returned.
|
||||
* Because the list of available layers may change externally between calls to vkEnumerateInstanceExtensionProperties,
|
||||
* two calls may retrieve different results if a pLayerName is available in one call but not in another. The extensions supported by a layer may also change between two calls,
|
||||
* e.g. if the layer implementation is replaced by a different version between those calls.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties(
|
||||
const char* pLayerName,
|
||||
uint32_t* pPropertyCount,
|
||||
VkExtensionProperties* pProperties)
|
||||
{
|
||||
assert(!pLayerName); //TODO layers ignored for now
|
||||
assert(pPropertyCount);
|
||||
|
||||
if(!pProperties)
|
||||
{
|
||||
*pPropertyCount = numInstanceExtensions;
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
int arraySize = *pPropertyCount;
|
||||
int elementsWritten = min(numInstanceExtensions, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pProperties[c] = instanceExtensions[c];
|
||||
}
|
||||
|
||||
*pPropertyCount = elementsWritten;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateInstance
|
||||
* There is no global state in Vulkan and all per-application state is stored in a VkInstance object. Creating a VkInstance object initializes the Vulkan library
|
||||
* vkCreateInstance verifies that the requested layers exist. If not, vkCreateInstance will return VK_ERROR_LAYER_NOT_PRESENT. Next vkCreateInstance verifies that
|
||||
* the requested extensions are supported (e.g. in the implementation or in any enabled instance layer) and if any requested extension is not supported,
|
||||
* vkCreateInstance must return VK_ERROR_EXTENSION_NOT_PRESENT. After verifying and enabling the instance layers and extensions the VkInstance object is
|
||||
* created and returned to the application.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance(
|
||||
const VkInstanceCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkInstance* pInstance)
|
||||
{
|
||||
assert(pInstance);
|
||||
assert(pCreateInfo);
|
||||
|
||||
*pInstance = malloc(sizeof(_instance));
|
||||
|
||||
if(!*pInstance)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
(*pInstance)->numEnabledExtensions = 0;
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO: possibly we need to load layers here
|
||||
//and store them in pInstance
|
||||
assert(pCreateInfo->enabledLayerCount == 0);
|
||||
|
||||
if(pCreateInfo->enabledExtensionCount)
|
||||
{
|
||||
assert(pCreateInfo->ppEnabledExtensionNames);
|
||||
}
|
||||
|
||||
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
|
||||
{
|
||||
int findres = findInstanceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
|
||||
if(findres > -1)
|
||||
{
|
||||
(*pInstance)->enabledExtensions[(*pInstance)->numEnabledExtensions] = findres;
|
||||
(*pInstance)->numEnabledExtensions++;
|
||||
}
|
||||
else
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
//TODO ignored for now
|
||||
//pCreateInfo->pApplicationInfo
|
||||
|
||||
int ret = openIoctl(); assert(!ret);
|
||||
|
||||
(*pInstance)->chipVersion = vc4_get_chip_info(controlFd);
|
||||
(*pInstance)->hasTiling = vc4_test_tiling(controlFd);
|
||||
|
||||
(*pInstance)->hasControlFlow = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
|
||||
(*pInstance)->hasEtc1 = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_ETC1);
|
||||
(*pInstance)->hasThreadedFs = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
|
||||
(*pInstance)->hasMadvise = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_MADVISE);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#devsandqueues-physical-device-enumeration
|
||||
* If pPhysicalDevices is NULL, then the number of physical devices available is returned in pPhysicalDeviceCount. Otherwise, pPhysicalDeviceCount must point to a
|
||||
* variable set by the user to the number of elements in the pPhysicalDevices array, and on return the variable is overwritten with the number of handles actually
|
||||
* written to pPhysicalDevices. If pPhysicalDeviceCount is less than the number of physical devices available, at most pPhysicalDeviceCount structures will be written.
|
||||
* If pPhysicalDeviceCount is smaller than the number of physical devices available, VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the
|
||||
* available physical devices were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices(
|
||||
VkInstance instance,
|
||||
uint32_t* pPhysicalDeviceCount,
|
||||
VkPhysicalDevice* pPhysicalDevices)
|
||||
{
|
||||
assert(instance);
|
||||
|
||||
//TODO is there a way to check if there's a gpu (and it's the rPi)?
|
||||
int gpuExists = access( "/dev/dri/card0", F_OK ) != -1;
|
||||
|
||||
int numGPUs = gpuExists;
|
||||
|
||||
assert(pPhysicalDeviceCount);
|
||||
|
||||
if(!pPhysicalDevices)
|
||||
{
|
||||
*pPhysicalDeviceCount = numGPUs;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pPhysicalDeviceCount;
|
||||
int elementsWritten = min(numGPUs, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pPhysicalDevices[c] = &instance->dev;
|
||||
}
|
||||
|
||||
*pPhysicalDeviceCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < arraySize)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
else
|
||||
{
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceProperties
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties* pProperties)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pProperties);
|
||||
|
||||
VkPhysicalDeviceSparseProperties sparseProps =
|
||||
{
|
||||
.residencyStandard2DBlockShape = 1,
|
||||
.residencyStandard2DMultisampleBlockShape = 1,
|
||||
.residencyStandard3DBlockShape = 1,
|
||||
.residencyAlignedMipSize = 1,
|
||||
.residencyNonResidentStrict = 1
|
||||
};
|
||||
|
||||
pProperties->apiVersion = VK_MAKE_VERSION(1,1,0);
|
||||
pProperties->driverVersion = 1; //we'll simply call this v1
|
||||
pProperties->vendorID = 0x14E4; //Broadcom
|
||||
pProperties->deviceID = 0; //TODO dunno?
|
||||
pProperties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
|
||||
strcpy(pProperties->deviceName, "VideoCore IV HW");
|
||||
//pProperties->pipelineCacheUUID
|
||||
pProperties->limits = _limits;
|
||||
pProperties->sparseProperties = sparseProps;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceFeatures
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceFeatures* pFeatures)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pFeatures);
|
||||
|
||||
*pFeatures = _features;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateDeviceExtensionProperties
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const char* pLayerName,
|
||||
uint32_t* pPropertyCount,
|
||||
VkExtensionProperties* pProperties)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(!pLayerName); //layers ignored for now
|
||||
assert(pPropertyCount);
|
||||
|
||||
if(!pProperties)
|
||||
{
|
||||
*pPropertyCount = numDeviceExtensions;
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
int arraySize = *pPropertyCount;
|
||||
int elementsWritten = min(numDeviceExtensions, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pProperties[c] = deviceExtensions[c];
|
||||
}
|
||||
|
||||
*pPropertyCount = elementsWritten;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceQueueFamilyProperties
|
||||
* If pQueueFamilyProperties is NULL, then the number of queue families available is returned in pQueueFamilyPropertyCount.
|
||||
* Otherwise, pQueueFamilyPropertyCount must point to a variable set by the user to the number of elements in the pQueueFamilyProperties array,
|
||||
* and on return the variable is overwritten with the number of structures actually written to pQueueFamilyProperties. If pQueueFamilyPropertyCount
|
||||
* is less than the number of queue families available, at most pQueueFamilyPropertyCount structures will be written.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t* pQueueFamilyPropertyCount,
|
||||
VkQueueFamilyProperties* pQueueFamilyProperties)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pQueueFamilyPropertyCount);
|
||||
|
||||
if(!pQueueFamilyProperties)
|
||||
{
|
||||
*pQueueFamilyPropertyCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
int arraySize = *pQueueFamilyPropertyCount;
|
||||
int elementsWritten = min(numQueueFamilies, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pQueueFamilyProperties[c] = _queueFamilyProperties[c];
|
||||
}
|
||||
|
||||
*pQueueFamilyPropertyCount = elementsWritten;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceSupportKHR
|
||||
* does this queue family support presentation to this surface?
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t queueFamilyIndex,
|
||||
VkSurfaceKHR surface,
|
||||
VkBool32* pSupported)
|
||||
{
|
||||
assert(pSupported);
|
||||
assert(surface);
|
||||
assert(physicalDevice);
|
||||
|
||||
assert(queueFamilyIndex < numQueueFamilies);
|
||||
|
||||
//TODO if we plan to support headless rendering, there should be 2 families
|
||||
//one using /dev/dri/card0 which has modesetting
|
||||
//other using /dev/dri/renderD128 which does not support modesetting, this would say false here
|
||||
*pSupported = VK_TRUE;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implementation of our RPI specific "extension"
|
||||
*/
|
||||
VkResult vkCreateRpiSurfaceKHR(
|
||||
VkInstance instance,
|
||||
const VkRpiSurfaceCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSurfaceKHR* pSurface)
|
||||
{
|
||||
assert(instance);
|
||||
//assert(pCreateInfo); //ignored for now
|
||||
assert(pSurface);
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
*pSurface = (VkSurfaceKHR)modeset_create(controlFd);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySurfaceKHR
|
||||
* Destroying a VkSurfaceKHR merely severs the connection between Vulkan and the native surface,
|
||||
* and does not imply destroying the native surface, closing a window, or similar behavior
|
||||
* (but we'll do so anyways...)
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR(
|
||||
VkInstance instance,
|
||||
VkSurfaceKHR surface,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(instance);
|
||||
assert(surface);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
modeset_destroy(controlFd, (modeset_dev*)surface);
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateDevice
|
||||
* vkCreateDevice verifies that extensions and features requested in the ppEnabledExtensionNames and pEnabledFeatures
|
||||
* members of pCreateInfo, respectively, are supported by the implementation. If any requested extension is not supported,
|
||||
* vkCreateDevice must return VK_ERROR_EXTENSION_NOT_PRESENT. If any requested feature is not supported, vkCreateDevice must return
|
||||
* VK_ERROR_FEATURE_NOT_PRESENT. Support for extensions can be checked before creating a device by querying vkEnumerateDeviceExtensionProperties
|
||||
* After verifying and enabling the extensions the VkDevice object is created and returned to the application.
|
||||
* If a requested extension is only supported by a layer, both the layer and the extension need to be specified at vkCreateInstance
|
||||
* time for the creation to succeed. Multiple logical devices can be created from the same physical device. Logical device creation may
|
||||
* fail due to lack of device-specific resources (in addition to the other errors). If that occurs, vkCreateDevice will return VK_ERROR_TOO_MANY_OBJECTS.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkDevice* pDevice)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(pDevice);
|
||||
assert(pCreateInfo);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
*pDevice = malloc(sizeof(_device));
|
||||
if(!pDevice)
|
||||
{
|
||||
return VK_ERROR_TOO_MANY_OBJECTS;
|
||||
}
|
||||
|
||||
(*pDevice)->dev = physicalDevice;
|
||||
|
||||
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
|
||||
{
|
||||
int findres = findDeviceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
|
||||
if(findres > -1)
|
||||
{
|
||||
(*pDevice)->enabledExtensions[(*pDevice)->numEnabledExtensions] = findres;
|
||||
(*pDevice)->numEnabledExtensions++;
|
||||
}
|
||||
else
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
VkBool32* requestedFeatures = pCreateInfo->pEnabledFeatures;
|
||||
VkBool32* supportedFeatures = &_features;
|
||||
|
||||
if(requestedFeatures)
|
||||
{
|
||||
for(int c = 0; c < numFeatures; ++c)
|
||||
{
|
||||
if(requestedFeatures[c] && !supportedFeatures[c])
|
||||
{
|
||||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
(*pDevice)->enabledFeatures = *pCreateInfo->pEnabledFeatures;
|
||||
}
|
||||
else
|
||||
{
|
||||
memset(&(*pDevice)->enabledFeatures, 0, sizeof((*pDevice)->enabledFeatures)); //just disable everything
|
||||
}
|
||||
|
||||
//layers ignored per spec
|
||||
//pCreateInfo->enabledLayerCount
|
||||
|
||||
for(int c = 0; c < numQueueFamilies; ++c)
|
||||
{
|
||||
(*pDevice)->queues[c] = 0;
|
||||
}
|
||||
|
||||
if(pCreateInfo->queueCreateInfoCount > 0)
|
||||
{
|
||||
for(int c = 0; c < pCreateInfo->queueCreateInfoCount; ++c)
|
||||
{
|
||||
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = malloc(sizeof(_queue)*pCreateInfo->pQueueCreateInfos[c].queueCount);
|
||||
|
||||
if(!(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex])
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
for(int d = 0; d < pCreateInfo->pQueueCreateInfos[c].queueCount; ++d)
|
||||
{
|
||||
(*pDevice)->queues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex][d].lastEmitSeqno = 0;
|
||||
}
|
||||
|
||||
(*pDevice)->numQueues[pCreateInfo->pQueueCreateInfos[c].queueFamilyIndex] = pCreateInfo->pQueueCreateInfos[c].queueCount;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetDeviceQueue
|
||||
* vkGetDeviceQueue must only be used to get queues that were created with the flags parameter of VkDeviceQueueCreateInfo set to zero.
|
||||
* To get queues that were created with a non-zero flags parameter use vkGetDeviceQueue2.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue(
|
||||
VkDevice device,
|
||||
uint32_t queueFamilyIndex,
|
||||
uint32_t queueIndex,
|
||||
VkQueue* pQueue)
|
||||
{
|
||||
assert(device);
|
||||
assert(pQueue);
|
||||
|
||||
assert(queueFamilyIndex < numQueueFamilies);
|
||||
assert(queueIndex < device->numQueues[queueFamilyIndex]);
|
||||
|
||||
*pQueue = &device->queues[queueFamilyIndex][queueIndex];
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSemaphore
|
||||
* Semaphores are a synchronization primitive that can be used to insert a dependency between batches submitted to queues.
|
||||
* Semaphores have two states - signaled and unsignaled. The state of a semaphore can be signaled after execution of a batch of commands is completed.
|
||||
* A batch can wait for a semaphore to become signaled before it begins execution, and the semaphore is also unsignaled before the batch begins execution.
|
||||
* As with most objects in Vulkan, semaphores are an interface to internal data which is typically opaque to applications.
|
||||
* This internal data is referred to as a semaphore’s payload. However, in order to enable communication with agents outside of the current device,
|
||||
* it is necessary to be able to export that payload to a commonly understood format, and subsequently import from that format as well.
|
||||
* The internal data of a semaphore may include a reference to any resources and pending work associated with signal or unsignal operations performed on that semaphore object.
|
||||
* Mechanisms to import and export that internal data to and from semaphores are provided below.
|
||||
* These mechanisms indirectly enable applications to share semaphore state between two or more semaphores and other synchronization primitives across process and API boundaries.
|
||||
* When created, the semaphore is in the unsignaled state.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(
|
||||
VkDevice device,
|
||||
const VkSemaphoreCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSemaphore* pSemaphore)
|
||||
{
|
||||
assert(device);
|
||||
assert(pSemaphore);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//we'll probably just use an IOCTL to wait for a GPU sequence number to complete.
|
||||
sem_t* s = malloc(sizeof(sem_t));
|
||||
if(!s)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
sem_init(s, 0, 0); //create semaphore unsignalled, shared between threads
|
||||
|
||||
*pSemaphore = (VkSemaphore)s;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceCapabilitiesKHR
|
||||
* The capabilities of a swapchain targetting a surface are the intersection of the capabilities of the WSI platform,
|
||||
* the native window or display, and the physical device. The resulting capabilities can be obtained with the queries listed
|
||||
* below in this section. Capabilities that correspond to image creation parameters are not independent of each other:
|
||||
* combinations of parameters that are not supported as reported by vkGetPhysicalDeviceImageFormatProperties are not supported
|
||||
* by the surface on that physical device, even if the capabilities taken individually are supported as part of some other parameter combinations.
|
||||
*
|
||||
* capabilities the specified device supports for a swapchain created for the surface
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR surface,
|
||||
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(surface);
|
||||
assert(pSurfaceCapabilities);
|
||||
|
||||
pSurfaceCapabilities->minImageCount = 1; //min 1
|
||||
pSurfaceCapabilities->maxImageCount = 2; //TODO max 2 for double buffering for now...
|
||||
pSurfaceCapabilities->currentExtent.width = ((modeset_dev*)surface)->width;
|
||||
pSurfaceCapabilities->currentExtent.height = ((modeset_dev*)surface)->height;
|
||||
pSurfaceCapabilities->minImageExtent.width = ((modeset_dev*)surface)->width; //TODO
|
||||
pSurfaceCapabilities->minImageExtent.height = ((modeset_dev*)surface)->height; //TODO
|
||||
pSurfaceCapabilities->maxImageExtent.width = ((modeset_dev*)surface)->width; //TODO
|
||||
pSurfaceCapabilities->maxImageExtent.height = ((modeset_dev*)surface)->height; //TODO
|
||||
pSurfaceCapabilities->maxImageArrayLayers = 1; //TODO maybe more layers for cursor etc.
|
||||
pSurfaceCapabilities->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO no rotation for now
|
||||
pSurfaceCapabilities->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO get this from dev
|
||||
pSurfaceCapabilities->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; //TODO no alpha compositing for now
|
||||
pSurfaceCapabilities->supportedUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; //well we want to draw on the screen right
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceFormatsKHR
|
||||
* If pSurfaceFormats is NULL, then the number of format pairs supported for the given surface is returned in pSurfaceFormatCount.
|
||||
* The number of format pairs supported will be greater than or equal to 1. Otherwise, pSurfaceFormatCount must point to a variable
|
||||
* set by the user to the number of elements in the pSurfaceFormats array, and on return the variable is overwritten with the number
|
||||
* of structures actually written to pSurfaceFormats. If the value of pSurfaceFormatCount is less than the number of format pairs supported,
|
||||
* at most pSurfaceFormatCount structures will be written. If pSurfaceFormatCount is smaller than the number of format pairs supported for the given surface,
|
||||
* VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR surface,
|
||||
uint32_t* pSurfaceFormatCount,
|
||||
VkSurfaceFormatKHR* pSurfaceFormats)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(surface);
|
||||
assert(pSurfaceFormatCount);
|
||||
|
||||
const int numFormats = 1;
|
||||
|
||||
if(!pSurfaceFormats)
|
||||
{
|
||||
*pSurfaceFormatCount = numFormats;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pSurfaceFormatCount;
|
||||
int elementsWritten = min(numFormats, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pSurfaceFormats[c] = supportedSurfaceFormats[c];
|
||||
}
|
||||
|
||||
*pSurfaceFormatCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < numFormats)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfacePresentModesKHR
|
||||
* If pPresentModes is NULL, then the number of presentation modes supported for the given surface is returned in pPresentModeCount.
|
||||
* Otherwise, pPresentModeCount must point to a variable set by the user to the number of elements in the pPresentModes array,
|
||||
* and on return the variable is overwritten with the number of values actually written to pPresentModes.
|
||||
* If the value of pPresentModeCount is less than the number of presentation modes supported, at most pPresentModeCount values will be written.
|
||||
* If pPresentModeCount is smaller than the number of presentation modes supported for the given surface, VK_INCOMPLETE will be returned instead of
|
||||
* VK_SUCCESS to indicate that not all the available values were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR surface,
|
||||
uint32_t* pPresentModeCount,
|
||||
VkPresentModeKHR* pPresentModes)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(surface);
|
||||
assert(pPresentModeCount);
|
||||
|
||||
const int numModes = 1;
|
||||
|
||||
if(!pPresentModes)
|
||||
{
|
||||
*pPresentModeCount = numModes;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pPresentModeCount;
|
||||
int elementsWritten = min(numModes, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
//TODO
|
||||
pPresentModes[c] = VK_PRESENT_MODE_FIFO_KHR;
|
||||
}
|
||||
|
||||
*pPresentModeCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < numModes)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSwapchainKHR
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR(
|
||||
VkDevice device,
|
||||
const VkSwapchainCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSwapchainKHR* pSwapchain)
|
||||
{
|
||||
assert(device);
|
||||
assert(pCreateInfo);
|
||||
assert(pSwapchain);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
*pSwapchain = malloc(sizeof(_swapchain));
|
||||
if(!*pSwapchain)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
_swapchain* s = *pSwapchain;
|
||||
|
||||
//TODO flags, layers, queue sharing, pretransform, composite alpha, present mode..., clipped, oldswapchain
|
||||
//TODO external sync on surface, oldswapchain
|
||||
|
||||
s->images = malloc(sizeof(_image) * pCreateInfo->minImageCount);
|
||||
if(!s->images)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
s->backbufferIdx = 0;
|
||||
s->numImages = pCreateInfo->minImageCount;
|
||||
s->surface = pCreateInfo->surface;
|
||||
|
||||
for(int c = 0; c < pCreateInfo->minImageCount; ++c)
|
||||
{
|
||||
s->images[c].width = pCreateInfo->imageExtent.width;
|
||||
s->images[c].height = pCreateInfo->imageExtent.height;
|
||||
s->images[c].depth = 1;
|
||||
s->images[c].layers = pCreateInfo->imageArrayLayers;
|
||||
s->images[c].miplevels = 1;
|
||||
s->images[c].samples = 1; //TODO
|
||||
s->images[c].usageBits = pCreateInfo->imageUsage;
|
||||
s->images[c].format = pCreateInfo->imageFormat;
|
||||
s->images[c].imageSpace = pCreateInfo->imageColorSpace;
|
||||
s->images[c].concurrentAccess = pCreateInfo->imageSharingMode;
|
||||
s->images[c].numQueueFamiliesWithAccess = pCreateInfo->queueFamilyIndexCount;
|
||||
if(s->images[c].concurrentAccess)
|
||||
{
|
||||
s->images[c].queueFamiliesWithAccess = malloc(sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
|
||||
memcpy(s->images[c].queueFamiliesWithAccess, pCreateInfo->pQueueFamilyIndices, sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
|
||||
}
|
||||
s->images[c].preTransformMode = pCreateInfo->preTransform;
|
||||
s->images[c].compositeAlpha = pCreateInfo->compositeAlpha;
|
||||
s->images[c].presentMode = pCreateInfo->presentMode;
|
||||
s->images[c].clipped = pCreateInfo->clipped;
|
||||
|
||||
createImageBO(&s->images[c]);
|
||||
int res = modeset_create_fb(controlFd, &s->images[c]); assert(res == 0);
|
||||
}
|
||||
|
||||
//defer to first swapbuffer (or at least later, getting swapchain != presenting immediately)
|
||||
//int res = modeset_fb_for_dev(controlFd, s->surface, &s->images[s->backbufferIdx]); assert(res == 0);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetSwapchainImagesKHR
|
||||
* If pSwapchainImages is NULL, then the number of presentable images for swapchain is returned in pSwapchainImageCount.
|
||||
* Otherwise, pSwapchainImageCount must point to a variable set by the user to the number of elements in the pSwapchainImages array,
|
||||
* and on return the variable is overwritten with the number of structures actually written to pSwapchainImages.
|
||||
* If the value of pSwapchainImageCount is less than the number of presentable images for swapchain, at most pSwapchainImageCount structures will be written.
|
||||
* If pSwapchainImageCount is smaller than the number of presentable images for swapchain, VK_INCOMPLETE will be returned instead of VK_SUCCESS to
|
||||
* indicate that not all the available values were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain,
|
||||
uint32_t* pSwapchainImageCount,
|
||||
VkImage* pSwapchainImages)
|
||||
{
|
||||
assert(device);
|
||||
assert(swapchain);
|
||||
assert(pSwapchainImageCount);
|
||||
|
||||
_swapchain* s = swapchain;
|
||||
|
||||
if(!pSwapchainImages)
|
||||
{
|
||||
*pSwapchainImageCount = s->numImages;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pSwapchainImageCount;
|
||||
int elementsWritten = min(s->numImages, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pSwapchainImages[c] = &s->images[c];
|
||||
}
|
||||
|
||||
*pSwapchainImageCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < s->numImages)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffers-pools
|
||||
* Command pools are opaque objects that command buffer memory is allocated from, and which allow the implementation to amortize the
|
||||
* cost of resource creation across multiple command buffers. Command pools are externally synchronized, meaning that a command pool must
|
||||
* not be used concurrently in multiple threads. That includes use via recording commands on any command buffers allocated from the pool,
|
||||
* as well as operations that allocate, free, and reset command buffers or the pool itself.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool(
|
||||
VkDevice device,
|
||||
const VkCommandPoolCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkCommandPool* pCommandPool)
|
||||
{
|
||||
assert(device);
|
||||
assert(pCreateInfo);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//VK_COMMAND_POOL_CREATE_TRANSIENT_BIT
|
||||
//specifies that command buffers allocated from the pool will be short-lived, meaning that they will be reset or freed in a relatively short timeframe.
|
||||
//This flag may be used by the implementation to control memory allocation behavior within the pool.
|
||||
//--> definitely use pool allocator
|
||||
|
||||
//VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
|
||||
//allows any command buffer allocated from a pool to be individually reset to the initial state; either by calling vkResetCommandBuffer, or via the implicit reset when calling vkBeginCommandBuffer.
|
||||
//If this flag is not set on a pool, then vkResetCommandBuffer must not be called for any command buffer allocated from that pool.
|
||||
|
||||
//TODO pool family ignored for now
|
||||
|
||||
_commandPool* cp = malloc(sizeof(_commandPool));
|
||||
|
||||
if(!cp)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
cp->queueFamilyIndex = pCreateInfo->queueFamilyIndex;
|
||||
|
||||
//initial number of command buffers to hold
|
||||
int numCommandBufs = 100;
|
||||
int controlListSize = ARM_PAGE_SIZE * 100;
|
||||
|
||||
//if(pCreateInfo->flags & VK_COMMAND_POOL_CREATE_TRANSIENT_BIT)
|
||||
{
|
||||
//use pool allocator
|
||||
void* pamem = malloc(numCommandBufs * sizeof(_commandBuffer));
|
||||
if(!pamem)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
cp->pa = createPoolAllocator(pamem, sizeof(_commandBuffer), numCommandBufs * sizeof(_commandBuffer));
|
||||
|
||||
void* cpamem = malloc(controlListSize);
|
||||
if(!cpamem)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
cp->cpa = createConsecutivePoolAllocator(cpamem, ARM_PAGE_SIZE, controlListSize);
|
||||
}
|
||||
|
||||
*pCommandPool = (VkCommandPool)cp;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#commandbuffer-allocation
|
||||
* vkAllocateCommandBuffers can be used to create multiple command buffers. If the creation of any of those command buffers fails,
|
||||
* the implementation must destroy all successfully created command buffer objects from this command, set all entries of the pCommandBuffers array to NULL and return the error.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
|
||||
VkDevice device,
|
||||
const VkCommandBufferAllocateInfo* pAllocateInfo,
|
||||
VkCommandBuffer* pCommandBuffers)
|
||||
{
|
||||
assert(device);
|
||||
assert(pAllocateInfo);
|
||||
assert(pCommandBuffers);
|
||||
|
||||
VkResult res = VK_SUCCESS;
|
||||
|
||||
_commandPool* cp = (_commandPool*)pAllocateInfo->commandPool;
|
||||
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
|
||||
{
|
||||
pCommandBuffers[c] = poolAllocate(&cp->pa);
|
||||
|
||||
if(!pCommandBuffers[c])
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
pCommandBuffers[c]->shaderRecCount = 0;
|
||||
pCommandBuffers[c]->usageFlags = 0;
|
||||
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
|
||||
pCommandBuffers[c]->cp = cp;
|
||||
clInit(&pCommandBuffers[c]->binCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
clInit(&pCommandBuffers[c]->handlesCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1));
|
||||
|
||||
if(!pCommandBuffers[c]->binCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
if(!pCommandBuffers[c]->handlesCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
if(!pCommandBuffers[c]->shaderRecCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
if(!pCommandBuffers[c]->uniformsCl.buffer)
|
||||
{
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(res != VK_SUCCESS)
|
||||
{
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
|
||||
{
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
poolFree(&cp->pa, pCommandBuffers[c]);
|
||||
pCommandBuffers[c] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkBeginCommandBuffer
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer(
|
||||
VkCommandBuffer commandBuffer,
|
||||
const VkCommandBufferBeginInfo* pBeginInfo)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
assert(pBeginInfo);
|
||||
|
||||
//TODO
|
||||
|
||||
//VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
|
||||
//specifies that each recording of the command buffer will only be submitted once, and the command buffer will be reset and recorded again between each submission.
|
||||
|
||||
//VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT
|
||||
//specifies that a secondary command buffer is considered to be entirely inside a render pass. If this is a primary command buffer, then this bit is ignored
|
||||
|
||||
//VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT
|
||||
//specifies that a command buffer can be resubmitted to a queue while it is in the pending state, and recorded into multiple primary command buffers
|
||||
|
||||
//When a command buffer begins recording, all state in that command buffer is undefined
|
||||
|
||||
struct drm_vc4_submit_cl submitCl =
|
||||
{
|
||||
.color_read.hindex = ~0,
|
||||
.zs_read.hindex = ~0,
|
||||
.color_write.hindex = ~0,
|
||||
.msaa_color_write.hindex = ~0,
|
||||
.zs_write.hindex = ~0,
|
||||
.msaa_zs_write.hindex = ~0,
|
||||
};
|
||||
|
||||
commandBuffer->usageFlags = pBeginInfo->flags;
|
||||
commandBuffer->shaderRecCount = 0;
|
||||
commandBuffer->state = CMDBUF_STATE_RECORDING;
|
||||
commandBuffer->submitCl = submitCl;
|
||||
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdPipelineBarrier
|
||||
* vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass.
|
||||
* When vkCmdPipelineBarrier is submitted to a queue, it defines a memory dependency between commands that were submitted before it, and those submitted after it.
|
||||
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the first synchronization scope includes all commands that occur earlier in submission order.
|
||||
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the first synchronization scope includes only commands that occur earlier in submission order within the same subpass.
|
||||
* In either case, the first synchronization scope is limited to operations on the pipeline stages determined by the source stage mask specified by srcStageMask.
|
||||
*
|
||||
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the second synchronization scope includes all commands that occur later in submission order.
|
||||
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the second synchronization scope includes only commands that occur later in submission order within the same subpass.
|
||||
* In either case, the second synchronization scope is limited to operations on the pipeline stages determined by the destination stage mask specified by dstStageMask.
|
||||
*
|
||||
* The first access scope is limited to access in the pipeline stages determined by the source stage mask specified by srcStageMask.
|
||||
* Within that, the first access scope only includes the first access scopes defined by elements of the pMemoryBarriers,
|
||||
* pBufferMemoryBarriers and pImageMemoryBarriers arrays, which each define a set of memory barriers. If no memory barriers are specified,
|
||||
* then the first access scope includes no accesses.
|
||||
*
|
||||
* The second access scope is limited to access in the pipeline stages determined by the destination stage mask specified by dstStageMask.
|
||||
* Within that, the second access scope only includes the second access scopes defined by elements of the pMemoryBarriers, pBufferMemoryBarriers and pImageMemoryBarriers arrays,
|
||||
* which each define a set of memory barriers. If no memory barriers are specified, then the second access scope includes no accesses.
|
||||
*
|
||||
* If dependencyFlags includes VK_DEPENDENCY_BY_REGION_BIT, then any dependency between framebuffer-space pipeline stages is framebuffer-local - otherwise it is framebuffer-global.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkPipelineStageFlags srcStageMask,
|
||||
VkPipelineStageFlags dstStageMask,
|
||||
VkDependencyFlags dependencyFlags,
|
||||
uint32_t memoryBarrierCount,
|
||||
const VkMemoryBarrier* pMemoryBarriers,
|
||||
uint32_t bufferMemoryBarrierCount,
|
||||
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
|
||||
uint32_t imageMemoryBarrierCount,
|
||||
const VkImageMemoryBarrier* pImageMemoryBarriers)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
|
||||
//TODO pipeline stage flags
|
||||
//VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
|
||||
//VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
||||
//VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
|
||||
//VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
||||
//VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
||||
//VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
|
||||
//VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_TRANSFER_BIT
|
||||
//VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
|
||||
//VK_PIPELINE_STAGE_HOST_BIT
|
||||
//VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
|
||||
//VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
|
||||
|
||||
//TODO dependency flags
|
||||
//VK_DEPENDENCY_BY_REGION_BIT,
|
||||
//VK_DEPENDENCY_DEVICE_GROUP_BIT,
|
||||
//VK_DEPENDENCY_VIEW_LOCAL_BIT
|
||||
|
||||
//TODO access flags
|
||||
//VK_ACCESS_INDIRECT_COMMAND_READ_BIT
|
||||
//VK_ACCESS_INDEX_READ_BIT
|
||||
//VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
|
||||
//VK_ACCESS_UNIFORM_READ_BIT
|
||||
//VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
|
||||
//VK_ACCESS_SHADER_READ_BIT
|
||||
//VK_ACCESS_SHADER_WRITE_BIT
|
||||
//VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
|
||||
//VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
|
||||
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
|
||||
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
||||
//VK_ACCESS_TRANSFER_READ_BIT
|
||||
//VK_ACCESS_TRANSFER_WRITE_BIT
|
||||
//VK_ACCESS_HOST_READ_BIT
|
||||
//VK_ACCESS_HOST_WRITE_BIT
|
||||
//VK_ACCESS_MEMORY_READ_BIT
|
||||
//VK_ACCESS_MEMORY_WRITE_BIT
|
||||
//VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX
|
||||
//VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX
|
||||
|
||||
//TODO Layout transition flags
|
||||
//VK_IMAGE_LAYOUT_UNDEFINED
|
||||
//VK_IMAGE_LAYOUT_GENERAL
|
||||
//VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_PREINITIALIZED
|
||||
//VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
|
||||
//VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR
|
||||
|
||||
for(int c = 0; c < memoryBarrierCount; ++c)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
for(int c = 0; c < bufferMemoryBarrierCount; ++c)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
for(int c = 0; c < imageMemoryBarrierCount; ++c)
|
||||
{
|
||||
_image* i = pImageMemoryBarriers[c].image;
|
||||
|
||||
assert(i->layout == pImageMemoryBarriers[c].oldLayout || i->layout == VK_IMAGE_LAYOUT_UNDEFINED);
|
||||
|
||||
if(srcStageMask & VK_PIPELINE_STAGE_TRANSFER_BIT &&
|
||||
pImageMemoryBarriers[c].srcAccessMask & VK_ACCESS_TRANSFER_WRITE_BIT &&
|
||||
i->needToClear)
|
||||
{
|
||||
//insert CRs to clear the image
|
||||
|
||||
assert(i->layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
|
||||
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
|
||||
0, 0, 0, 0,
|
||||
getFormatBpp(i->format) == 64, //64 bit color mode
|
||||
i->samples > 1, //msaa
|
||||
i->width, i->height, 0, 0, 0);
|
||||
|
||||
//START_TILE_BINNING resets the statechange counters in the hardware,
|
||||
//which are what is used when a primitive is binned to a tile to
|
||||
//figure out what new state packets need to be written to that tile's
|
||||
//command list.
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
|
||||
clInsertStartTileBinning(&commandBuffer->binCl);
|
||||
|
||||
//Reset the current compressed primitives format. This gets modified
|
||||
//by VC4_PACKET_GL_INDEXED_PRIMITIVE and
|
||||
//VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
|
||||
//of every tile.
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length);
|
||||
clInsertPrimitiveListFormat(&commandBuffer->binCl,
|
||||
1, //16 bit
|
||||
2); //tris
|
||||
|
||||
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
|
||||
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, i->handle);
|
||||
commandBuffer->submitCl.color_write.hindex = idx;
|
||||
commandBuffer->submitCl.color_write.offset = 0;
|
||||
commandBuffer->submitCl.color_write.flags = 0;
|
||||
//TODO format
|
||||
commandBuffer->submitCl.color_write.bits =
|
||||
VC4_SET_FIELD(VC4_RENDER_CONFIG_FORMAT_RGBA8888, VC4_RENDER_CONFIG_FORMAT) |
|
||||
VC4_SET_FIELD(i->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
|
||||
|
||||
commandBuffer->submitCl.clear_color[0] = i->clearColor[0];
|
||||
commandBuffer->submitCl.clear_color[1] = i->clearColor[1];
|
||||
|
||||
//TODO ranges
|
||||
commandBuffer->submitCl.min_x_tile = 0;
|
||||
commandBuffer->submitCl.min_y_tile = 0;
|
||||
|
||||
uint32_t tileSizeW = 64;
|
||||
uint32_t tileSizeH = 64;
|
||||
|
||||
if(i->samples > 1)
|
||||
{
|
||||
tileSizeW >>= 1;
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
if(getFormatBpp(i->format) == 64)
|
||||
{
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
uint32_t widthInTiles = divRoundUp(i->width, tileSizeW);
|
||||
uint32_t heightInTiles = divRoundUp(i->height, tileSizeH);
|
||||
|
||||
commandBuffer->submitCl.max_x_tile = widthInTiles - 1;
|
||||
commandBuffer->submitCl.max_y_tile = heightInTiles - 1;
|
||||
commandBuffer->submitCl.width = i->width;
|
||||
commandBuffer->submitCl.height = i->height;
|
||||
commandBuffer->submitCl.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
|
||||
commandBuffer->submitCl.clear_z = 0; //TODO
|
||||
commandBuffer->submitCl.clear_s = 0;
|
||||
}
|
||||
|
||||
//transition to new layout
|
||||
i->layout = pImageMemoryBarriers[c].newLayout;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdClearColorImage
|
||||
* Color and depth/stencil images can be cleared outside a render pass instance using vkCmdClearColorImage or vkCmdClearDepthStencilImage, respectively.
|
||||
* These commands are only allowed outside of a render pass instance.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkImage image,
|
||||
VkImageLayout imageLayout,
|
||||
const VkClearColorValue* pColor,
|
||||
uint32_t rangeCount,
|
||||
const VkImageSubresourceRange* pRanges)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
assert(image);
|
||||
assert(pColor);
|
||||
|
||||
//TODO this should only flag an image for clearing. This can only be called outside a renderpass
|
||||
//actual clearing would only happen:
|
||||
// -if image is rendered to (insert clear before first draw call)
|
||||
// -if the image is bound for sampling (submit a CL with a clear)
|
||||
// -if a command buffer is submitted without any rendering (insert clear)
|
||||
// -etc.
|
||||
//we shouldn't clear an image if noone uses it
|
||||
|
||||
//TODO ranges support
|
||||
|
||||
assert(imageLayout == VK_IMAGE_LAYOUT_GENERAL ||
|
||||
imageLayout == VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR ||
|
||||
imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
||||
assert(commandBuffer->state == CMDBUF_STATE_RECORDING);
|
||||
assert(_queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_GRAPHICS_BIT || _queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_COMPUTE_BIT);
|
||||
|
||||
_image* i = image;
|
||||
|
||||
assert(i->usageBits & VK_IMAGE_USAGE_TRANSFER_DST_BIT);
|
||||
|
||||
//TODO externally sync cmdbuf, cmdpool
|
||||
|
||||
i->needToClear = 1;
|
||||
i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEndCommandBuffer
|
||||
* If there was an error during recording, the application will be notified by an unsuccessful return code returned by vkEndCommandBuffer.
|
||||
* If the application wishes to further use the command buffer, the command buffer must be reset. The command buffer must have been in the recording state,
|
||||
* and is moved to the executable state.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer(
|
||||
VkCommandBuffer commandBuffer)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
|
||||
//Increment the semaphore indicating that binning is done and
|
||||
//unblocking the render thread. Note that this doesn't act
|
||||
//until the FLUSH completes.
|
||||
//The FLUSH caps all of our bin lists with a
|
||||
//VC4_PACKET_RETURN.
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length);
|
||||
clInsertIncrementSemaphore(&commandBuffer->binCl);
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length);
|
||||
clInsertFlush(&commandBuffer->binCl);
|
||||
|
||||
commandBuffer->state = CMDBUF_STATE_EXECUTABLE;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkAcquireNextImageKHR
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain,
|
||||
uint64_t timeout,
|
||||
VkSemaphore semaphore,
|
||||
VkFence fence,
|
||||
uint32_t* pImageIndex)
|
||||
{
|
||||
assert(device);
|
||||
assert(swapchain);
|
||||
|
||||
assert(semaphore != VK_NULL_HANDLE || fence != VK_NULL_HANDLE);
|
||||
|
||||
sem_t* s = semaphore;
|
||||
|
||||
//TODO we need to keep track of currently acquired images?
|
||||
|
||||
//TODO wait timeout?
|
||||
|
||||
*pImageIndex = ((_swapchain*)swapchain)->backbufferIdx; //return back buffer index
|
||||
|
||||
//signal semaphore
|
||||
int semVal; sem_getvalue(s, &semVal); assert(semVal <= 0); //make sure semaphore is unsignalled
|
||||
sem_post(s);
|
||||
|
||||
//TODO signal fence
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueueSubmit
|
||||
* vkQueueSubmit is a queue submission command, with each batch defined by an element of pSubmits as an instance of the VkSubmitInfo structure.
|
||||
* Batches begin execution in the order they appear in pSubmits, but may complete out of order.
|
||||
* Fence and semaphore operations submitted with vkQueueSubmit have additional ordering constraints compared to other submission commands,
|
||||
* with dependencies involving previous and subsequent queue operations. Information about these additional constraints can be found in the semaphore and
|
||||
* fence sections of the synchronization chapter.
|
||||
* Details on the interaction of pWaitDstStageMask with synchronization are described in the semaphore wait operation section of the synchronization chapter.
|
||||
* The order that batches appear in pSubmits is used to determine submission order, and thus all the implicit ordering guarantees that respect it.
|
||||
* Other than these implicit ordering guarantees and any explicit synchronization primitives, these batches may overlap or otherwise execute out of order.
|
||||
* If any command buffer submitted to this queue is in the executable state, it is moved to the pending state. Once execution of all submissions of a command buffer complete,
|
||||
* it moves from the pending state, back to the executable state. If a command buffer was recorded with the VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT flag,
|
||||
* it instead moves back to the invalid state.
|
||||
* If vkQueueSubmit fails, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
|
||||
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced by the submitted command buffers and any semaphores
|
||||
* referenced by pSubmits is unaffected by the call or its failure. If vkQueueSubmit fails in such a way that the implementation is unable to make that guarantee,
|
||||
* the implementation must return VK_ERROR_DEVICE_LOST. See Lost Device.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
|
||||
VkQueue queue,
|
||||
uint32_t submitCount,
|
||||
const VkSubmitInfo* pSubmits,
|
||||
VkFence fence)
|
||||
{
|
||||
assert(queue);
|
||||
|
||||
for(int c = 0; c < pSubmits->waitSemaphoreCount; ++c)
|
||||
{
|
||||
sem_wait((sem_t*)pSubmits->pWaitSemaphores[c]);
|
||||
}
|
||||
|
||||
//TODO: deal with pSubmits->pWaitDstStageMask
|
||||
|
||||
//TODO wait for fence??
|
||||
|
||||
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
|
||||
{
|
||||
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_EXECUTABLE)
|
||||
{
|
||||
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_PENDING;
|
||||
}
|
||||
}
|
||||
|
||||
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
|
||||
{
|
||||
VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c];
|
||||
|
||||
cmdbuf->submitCl.bo_handles = cmdbuf->handlesCl.buffer;
|
||||
cmdbuf->submitCl.bo_handle_count = clSize(&cmdbuf->handlesCl) / 4;
|
||||
cmdbuf->submitCl.bin_cl = cmdbuf->binCl.buffer;
|
||||
cmdbuf->submitCl.bin_cl_size = clSize(&cmdbuf->binCl);
|
||||
cmdbuf->submitCl.shader_rec = cmdbuf->shaderRecCl.buffer;
|
||||
cmdbuf->submitCl.shader_rec_size = clSize(&cmdbuf->shaderRecCl);
|
||||
cmdbuf->submitCl.shader_rec_count = cmdbuf->shaderRecCount;
|
||||
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
|
||||
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
|
||||
|
||||
printf("BCL:\n");
|
||||
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
|
||||
printf("BO handles: ");
|
||||
for(int d = 0; d < cmdbuf->submitCl.bo_handle_count; ++d)
|
||||
{
|
||||
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.bo_handles)+d));
|
||||
}
|
||||
printf("\nwidth height: %u, %u\n", cmdbuf->submitCl.width, cmdbuf->submitCl.height);
|
||||
printf("tile min/max: %u,%u %u,%u\n", cmdbuf->submitCl.min_x_tile, cmdbuf->submitCl.min_y_tile, cmdbuf->submitCl.max_x_tile, cmdbuf->submitCl.max_y_tile);
|
||||
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_read.hindex, cmdbuf->submitCl.color_read.offset, cmdbuf->submitCl.color_read.bits, cmdbuf->submitCl.color_read.flags);
|
||||
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_write.hindex, cmdbuf->submitCl.color_write.offset, cmdbuf->submitCl.color_write.bits, cmdbuf->submitCl.color_write.flags);
|
||||
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_read.hindex, cmdbuf->submitCl.zs_read.offset, cmdbuf->submitCl.zs_read.bits, cmdbuf->submitCl.zs_read.flags);
|
||||
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_write.hindex, cmdbuf->submitCl.zs_write.offset, cmdbuf->submitCl.zs_write.bits, cmdbuf->submitCl.zs_write.flags);
|
||||
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_color_write.hindex, cmdbuf->submitCl.msaa_color_write.offset, cmdbuf->submitCl.msaa_color_write.bits, cmdbuf->submitCl.msaa_color_write.flags);
|
||||
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_zs_write.hindex, cmdbuf->submitCl.msaa_zs_write.offset, cmdbuf->submitCl.msaa_zs_write.bits, cmdbuf->submitCl.msaa_zs_write.flags);
|
||||
printf("clear color packed rgba %u %u\n", cmdbuf->submitCl.clear_color[0], cmdbuf->submitCl.clear_color[1]);
|
||||
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
|
||||
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
|
||||
printf("flags %u\n", cmdbuf->submitCl.flags);
|
||||
|
||||
|
||||
//submit ioctl
|
||||
static uint64_t lastFinishedSeqno = 0;
|
||||
vc4_cl_submit(controlFd, &cmdbuf->submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
|
||||
}
|
||||
|
||||
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
|
||||
{
|
||||
if(pSubmits->pCommandBuffers[c]->state == CMDBUF_STATE_PENDING)
|
||||
{
|
||||
if(pSubmits->pCommandBuffers[c]->usageFlags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)
|
||||
{
|
||||
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_INVALID;
|
||||
}
|
||||
else
|
||||
{
|
||||
pSubmits->pCommandBuffers[c]->state = CMDBUF_STATE_EXECUTABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int c = 0; c < pSubmits->signalSemaphoreCount; ++c)
|
||||
{
|
||||
sem_post((sem_t*)pSubmits->pSignalSemaphores[c]);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueuePresentKHR
|
||||
* Any writes to memory backing the images referenced by the pImageIndices and pSwapchains members of pPresentInfo,
|
||||
* that are available before vkQueuePresentKHR is executed, are automatically made visible to the read access performed by the presentation engine.
|
||||
* This automatic visibility operation for an image happens-after the semaphore signal operation, and happens-before the presentation engine accesses the image.
|
||||
* Queueing an image for presentation defines a set of queue operations, including waiting on the semaphores and submitting a presentation request to the presentation engine.
|
||||
* However, the scope of this set of queue operations does not include the actual processing of the image by the presentation engine.
|
||||
* If vkQueuePresentKHR fails to enqueue the corresponding set of queue operations, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
|
||||
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced is unaffected by the call or its failure.
|
||||
* If vkQueuePresentKHR fails in such a way that the implementation is unable to make that guarantee, the implementation must return VK_ERROR_DEVICE_LOST.
|
||||
* However, if the presentation request is rejected by the presentation engine with an error VK_ERROR_OUT_OF_DATE_KHR or VK_ERROR_SURFACE_LOST_KHR,
|
||||
* the set of queue operations are still considered to be enqueued and thus any semaphore to be waited on gets unsignaled when the corresponding queue operation is complete.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(
|
||||
VkQueue queue,
|
||||
const VkPresentInfoKHR* pPresentInfo)
|
||||
{
|
||||
assert(queue);
|
||||
assert(pPresentInfo);
|
||||
|
||||
//wait for semaphore in present info set by submit ioctl to make sure cls are flushed
|
||||
for(int c = 0; c < pPresentInfo->waitSemaphoreCount; ++c)
|
||||
{
|
||||
sem_wait((sem_t*)pPresentInfo->pWaitSemaphores[c]);
|
||||
}
|
||||
|
||||
for(int c = 0; c < pPresentInfo->swapchainCount; ++c)
|
||||
{
|
||||
_swapchain* s = pPresentInfo->pSwapchains[c];
|
||||
modeset_present_buffer(controlFd, (modeset_dev*)s->surface, &s->images[s->backbufferIdx]);
|
||||
s->backbufferIdx = (s->backbufferIdx + 1) % s->numImages;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDeviceWaitIdle
|
||||
* vkDeviceWaitIdle is equivalent to calling vkQueueWaitIdle for all queues owned by device.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(
|
||||
VkDevice device)
|
||||
{
|
||||
assert(device);
|
||||
|
||||
for(int c = 0; c < numQueueFamilies; ++c)
|
||||
{
|
||||
for(int d = 0; d < device->numQueues[c]; ++d)
|
||||
{
|
||||
uint64_t lastFinishedSeqno;
|
||||
vc4_seqno_wait(controlFd, &lastFinishedSeqno, device->queues[c][d].lastEmitSeqno, WAIT_TIMEOUT_INFINITE);
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkFreeCommandBuffers
|
||||
* Any primary command buffer that is in the recording or executable state and has any element of pCommandBuffers recorded into it, becomes invalid.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers(
|
||||
VkDevice device,
|
||||
VkCommandPool commandPool,
|
||||
uint32_t commandBufferCount,
|
||||
const VkCommandBuffer* pCommandBuffers)
|
||||
{
|
||||
assert(device);
|
||||
assert(commandPool);
|
||||
assert(pCommandBuffers);
|
||||
|
||||
_commandPool* cp = (_commandPool*)commandPool;
|
||||
|
||||
for(int c = 0; c < commandBufferCount; ++c)
|
||||
{
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
|
||||
poolFree(&cp->pa, pCommandBuffers[c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyCommandPool
|
||||
* When a pool is destroyed, all command buffers allocated from the pool are freed.
|
||||
* Any primary command buffer allocated from another VkCommandPool that is in the recording or executable state and has a secondary command buffer
|
||||
* allocated from commandPool recorded into it, becomes invalid.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool(
|
||||
VkDevice device,
|
||||
VkCommandPool commandPool,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
assert(commandPool);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
_commandPool* cp = (_commandPool*)commandPool;
|
||||
|
||||
//if(cp->usePoolAllocator)
|
||||
{
|
||||
free(cp->pa.buf);
|
||||
free(cp->cpa.buf);
|
||||
destroyPoolAllocator(&cp->pa);
|
||||
destroyConsecutivePoolAllocator(&cp->cpa);
|
||||
}
|
||||
|
||||
free(cp);
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySemaphore
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore(
|
||||
VkDevice device,
|
||||
VkSemaphore semaphore,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
assert(semaphore);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
sem_destroy((sem_t*)semaphore);
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySwapchainKHR
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
assert(swapchain);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO flush all ops
|
||||
|
||||
_swapchain* s = swapchain;
|
||||
|
||||
for(int c = 0; c < s->numImages; ++c)
|
||||
{
|
||||
vc4_bo_free(controlFd, s->images[c].handle, 0, s->images->size);
|
||||
modeset_destroy_fb(controlFd, &s->images[c]);
|
||||
}
|
||||
|
||||
free(s->images);
|
||||
free(s);
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyDevice
|
||||
* To ensure that no work is active on the device, vkDeviceWaitIdle can be used to gate the destruction of the device.
|
||||
* Prior to destroying a device, an application is responsible for destroying/freeing any Vulkan objects that were created using that device as the
|
||||
* first parameter of the corresponding vkCreate* or vkAllocate* command
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroyDevice(
|
||||
VkDevice device,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyInstance
|
||||
*
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroyInstance(
|
||||
VkInstance instance,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(instance);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO
|
||||
closeIoctl();
|
||||
}
|
||||
|
124
driver/instance.c
Normal file
124
driver/instance.c
Normal file
@ -0,0 +1,124 @@
|
||||
#include "common.h"
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateInstanceExtensionProperties
|
||||
* When pLayerName parameter is NULL, only extensions provided by the Vulkan implementation or by implicitly enabled layers are returned. When pLayerName is the name of a layer,
|
||||
* the instance extensions provided by that layer are returned.
|
||||
* If pProperties is NULL, then the number of extensions properties available is returned in pPropertyCount. Otherwise, pPropertyCount must point to a variable set by the user
|
||||
* to the number of elements in the pProperties array, and on return the variable is overwritten with the number of structures actually written to pProperties.
|
||||
* If pPropertyCount is less than the number of extension properties available, at most pPropertyCount structures will be written. If pPropertyCount is smaller than the number of extensions available,
|
||||
* VK_INCOMPLETE will be returned instead of VK_SUCCESS, to indicate that not all the available properties were returned.
|
||||
* Because the list of available layers may change externally between calls to vkEnumerateInstanceExtensionProperties,
|
||||
* two calls may retrieve different results if a pLayerName is available in one call but not in another. The extensions supported by a layer may also change between two calls,
|
||||
* e.g. if the layer implementation is replaced by a different version between those calls.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties(
|
||||
const char* pLayerName,
|
||||
uint32_t* pPropertyCount,
|
||||
VkExtensionProperties* pProperties)
|
||||
{
|
||||
assert(!pLayerName); //TODO layers ignored for now
|
||||
assert(pPropertyCount);
|
||||
|
||||
if(!pProperties)
|
||||
{
|
||||
*pPropertyCount = numInstanceExtensions;
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
int arraySize = *pPropertyCount;
|
||||
int elementsWritten = min(numInstanceExtensions, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pProperties[c] = instanceExtensions[c];
|
||||
}
|
||||
|
||||
*pPropertyCount = elementsWritten;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateInstance
|
||||
* There is no global state in Vulkan and all per-application state is stored in a VkInstance object. Creating a VkInstance object initializes the Vulkan library
|
||||
* vkCreateInstance verifies that the requested layers exist. If not, vkCreateInstance will return VK_ERROR_LAYER_NOT_PRESENT. Next vkCreateInstance verifies that
|
||||
* the requested extensions are supported (e.g. in the implementation or in any enabled instance layer) and if any requested extension is not supported,
|
||||
* vkCreateInstance must return VK_ERROR_EXTENSION_NOT_PRESENT. After verifying and enabling the instance layers and extensions the VkInstance object is
|
||||
* created and returned to the application.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance(
|
||||
const VkInstanceCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkInstance* pInstance)
|
||||
{
|
||||
assert(pInstance);
|
||||
assert(pCreateInfo);
|
||||
|
||||
*pInstance = malloc(sizeof(_instance));
|
||||
|
||||
if(!*pInstance)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
(*pInstance)->numEnabledExtensions = 0;
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO: possibly we need to load layers here
|
||||
//and store them in pInstance
|
||||
assert(pCreateInfo->enabledLayerCount == 0);
|
||||
|
||||
if(pCreateInfo->enabledExtensionCount)
|
||||
{
|
||||
assert(pCreateInfo->ppEnabledExtensionNames);
|
||||
}
|
||||
|
||||
for(int c = 0; c < pCreateInfo->enabledExtensionCount; ++c)
|
||||
{
|
||||
int findres = findInstanceExtension(pCreateInfo->ppEnabledExtensionNames[c]);
|
||||
if(findres > -1)
|
||||
{
|
||||
(*pInstance)->enabledExtensions[(*pInstance)->numEnabledExtensions] = findres;
|
||||
(*pInstance)->numEnabledExtensions++;
|
||||
}
|
||||
else
|
||||
{
|
||||
return VK_ERROR_EXTENSION_NOT_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
//TODO ignored for now
|
||||
//pCreateInfo->pApplicationInfo
|
||||
|
||||
int ret = openIoctl(); assert(!ret);
|
||||
|
||||
(*pInstance)->chipVersion = vc4_get_chip_info(controlFd);
|
||||
(*pInstance)->hasTiling = vc4_test_tiling(controlFd);
|
||||
|
||||
(*pInstance)->hasControlFlow = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_BRANCHES);
|
||||
(*pInstance)->hasEtc1 = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_ETC1);
|
||||
(*pInstance)->hasThreadedFs = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_THREADED_FS);
|
||||
(*pInstance)->hasMadvise = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_MADVISE);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroyInstance
|
||||
*
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroyInstance(
|
||||
VkInstance instance,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(instance);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO
|
||||
closeIoctl();
|
||||
}
|
273
driver/sync.c
Normal file
273
driver/sync.c
Normal file
@ -0,0 +1,273 @@
|
||||
#include "common.h"
|
||||
|
||||
#include "kernel/vc4_packet.h"
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSemaphore
|
||||
* Semaphores are a synchronization primitive that can be used to insert a dependency between batches submitted to queues.
|
||||
* Semaphores have two states - signaled and unsignaled. The state of a semaphore can be signaled after execution of a batch of commands is completed.
|
||||
* A batch can wait for a semaphore to become signaled before it begins execution, and the semaphore is also unsignaled before the batch begins execution.
|
||||
* As with most objects in Vulkan, semaphores are an interface to internal data which is typically opaque to applications.
|
||||
* This internal data is referred to as a semaphore’s payload. However, in order to enable communication with agents outside of the current device,
|
||||
* it is necessary to be able to export that payload to a commonly understood format, and subsequently import from that format as well.
|
||||
* The internal data of a semaphore may include a reference to any resources and pending work associated with signal or unsignal operations performed on that semaphore object.
|
||||
* Mechanisms to import and export that internal data to and from semaphores are provided below.
|
||||
* These mechanisms indirectly enable applications to share semaphore state between two or more semaphores and other synchronization primitives across process and API boundaries.
|
||||
* When created, the semaphore is in the unsignaled state.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore(
|
||||
VkDevice device,
|
||||
const VkSemaphoreCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSemaphore* pSemaphore)
|
||||
{
|
||||
assert(device);
|
||||
assert(pSemaphore);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//we'll probably just use an IOCTL to wait for a GPU sequence number to complete.
|
||||
sem_t* s = malloc(sizeof(sem_t));
|
||||
if(!s)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
sem_init(s, 0, 0); //create semaphore unsignalled, shared between threads
|
||||
|
||||
*pSemaphore = (VkSemaphore)s;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdPipelineBarrier
|
||||
* vkCmdPipelineBarrier is a synchronization command that inserts a dependency between commands submitted to the same queue, or between commands in the same subpass.
|
||||
* When vkCmdPipelineBarrier is submitted to a queue, it defines a memory dependency between commands that were submitted before it, and those submitted after it.
|
||||
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the first synchronization scope includes all commands that occur earlier in submission order.
|
||||
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the first synchronization scope includes only commands that occur earlier in submission order within the same subpass.
|
||||
* In either case, the first synchronization scope is limited to operations on the pipeline stages determined by the source stage mask specified by srcStageMask.
|
||||
*
|
||||
* If vkCmdPipelineBarrier was recorded outside a render pass instance, the second synchronization scope includes all commands that occur later in submission order.
|
||||
* If vkCmdPipelineBarrier was recorded inside a render pass instance, the second synchronization scope includes only commands that occur later in submission order within the same subpass.
|
||||
* In either case, the second synchronization scope is limited to operations on the pipeline stages determined by the destination stage mask specified by dstStageMask.
|
||||
*
|
||||
* The first access scope is limited to access in the pipeline stages determined by the source stage mask specified by srcStageMask.
|
||||
* Within that, the first access scope only includes the first access scopes defined by elements of the pMemoryBarriers,
|
||||
* pBufferMemoryBarriers and pImageMemoryBarriers arrays, which each define a set of memory barriers. If no memory barriers are specified,
|
||||
* then the first access scope includes no accesses.
|
||||
*
|
||||
* The second access scope is limited to access in the pipeline stages determined by the destination stage mask specified by dstStageMask.
|
||||
* Within that, the second access scope only includes the second access scopes defined by elements of the pMemoryBarriers, pBufferMemoryBarriers and pImageMemoryBarriers arrays,
|
||||
* which each define a set of memory barriers. If no memory barriers are specified, then the second access scope includes no accesses.
|
||||
*
|
||||
* If dependencyFlags includes VK_DEPENDENCY_BY_REGION_BIT, then any dependency between framebuffer-space pipeline stages is framebuffer-local - otherwise it is framebuffer-global.
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkPipelineStageFlags srcStageMask,
|
||||
VkPipelineStageFlags dstStageMask,
|
||||
VkDependencyFlags dependencyFlags,
|
||||
uint32_t memoryBarrierCount,
|
||||
const VkMemoryBarrier* pMemoryBarriers,
|
||||
uint32_t bufferMemoryBarrierCount,
|
||||
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
|
||||
uint32_t imageMemoryBarrierCount,
|
||||
const VkImageMemoryBarrier* pImageMemoryBarriers)
|
||||
{
|
||||
assert(commandBuffer);
|
||||
|
||||
//TODO pipeline stage flags
|
||||
//VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
|
||||
//VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
||||
//VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
|
||||
//VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
||||
//VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
||||
//VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
|
||||
//VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
|
||||
//VK_PIPELINE_STAGE_TRANSFER_BIT
|
||||
//VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT
|
||||
//VK_PIPELINE_STAGE_HOST_BIT
|
||||
//VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT
|
||||
//VK_PIPELINE_STAGE_ALL_COMMANDS_BIT
|
||||
|
||||
//TODO dependency flags
|
||||
//VK_DEPENDENCY_BY_REGION_BIT,
|
||||
//VK_DEPENDENCY_DEVICE_GROUP_BIT,
|
||||
//VK_DEPENDENCY_VIEW_LOCAL_BIT
|
||||
|
||||
//TODO access flags
|
||||
//VK_ACCESS_INDIRECT_COMMAND_READ_BIT
|
||||
//VK_ACCESS_INDEX_READ_BIT
|
||||
//VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT
|
||||
//VK_ACCESS_UNIFORM_READ_BIT
|
||||
//VK_ACCESS_INPUT_ATTACHMENT_READ_BIT
|
||||
//VK_ACCESS_SHADER_READ_BIT
|
||||
//VK_ACCESS_SHADER_WRITE_BIT
|
||||
//VK_ACCESS_COLOR_ATTACHMENT_READ_BIT
|
||||
//VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
|
||||
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT
|
||||
//VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT
|
||||
//VK_ACCESS_TRANSFER_READ_BIT
|
||||
//VK_ACCESS_TRANSFER_WRITE_BIT
|
||||
//VK_ACCESS_HOST_READ_BIT
|
||||
//VK_ACCESS_HOST_WRITE_BIT
|
||||
//VK_ACCESS_MEMORY_READ_BIT
|
||||
//VK_ACCESS_MEMORY_WRITE_BIT
|
||||
//VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX
|
||||
//VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX
|
||||
|
||||
//TODO Layout transition flags
|
||||
//VK_IMAGE_LAYOUT_UNDEFINED
|
||||
//VK_IMAGE_LAYOUT_GENERAL
|
||||
//VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_PREINITIALIZED
|
||||
//VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL
|
||||
//VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
|
||||
//VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR
|
||||
|
||||
for(int c = 0; c < memoryBarrierCount; ++c)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
for(int c = 0; c < bufferMemoryBarrierCount; ++c)
|
||||
{
|
||||
//TODO
|
||||
}
|
||||
|
||||
for(int c = 0; c < imageMemoryBarrierCount; ++c)
|
||||
{
|
||||
_image* i = pImageMemoryBarriers[c].image;
|
||||
|
||||
assert(i->layout == pImageMemoryBarriers[c].oldLayout || i->layout == VK_IMAGE_LAYOUT_UNDEFINED);
|
||||
|
||||
if(srcStageMask & VK_PIPELINE_STAGE_TRANSFER_BIT &&
|
||||
pImageMemoryBarriers[c].srcAccessMask & VK_ACCESS_TRANSFER_WRITE_BIT &&
|
||||
i->needToClear)
|
||||
{
|
||||
//insert CRs to clear the image
|
||||
|
||||
assert(i->layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
|
||||
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
|
||||
0, 0, 0, 0,
|
||||
getFormatBpp(i->format) == 64, //64 bit color mode
|
||||
i->samples > 1, //msaa
|
||||
i->width, i->height, 0, 0, 0);
|
||||
|
||||
//START_TILE_BINNING resets the statechange counters in the hardware,
|
||||
//which are what is used when a primitive is binned to a tile to
|
||||
//figure out what new state packets need to be written to that tile's
|
||||
//command list.
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
|
||||
clInsertStartTileBinning(&commandBuffer->binCl);
|
||||
|
||||
//Reset the current compressed primitives format. This gets modified
|
||||
//by VC4_PACKET_GL_INDEXED_PRIMITIVE and
|
||||
//VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
|
||||
//of every tile.
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length);
|
||||
clInsertPrimitiveListFormat(&commandBuffer->binCl,
|
||||
1, //16 bit
|
||||
2); //tris
|
||||
|
||||
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
|
||||
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, i->handle);
|
||||
commandBuffer->submitCl.color_write.hindex = idx;
|
||||
commandBuffer->submitCl.color_write.offset = 0;
|
||||
commandBuffer->submitCl.color_write.flags = 0;
|
||||
//TODO format
|
||||
commandBuffer->submitCl.color_write.bits =
|
||||
VC4_SET_FIELD(VC4_RENDER_CONFIG_FORMAT_RGBA8888, VC4_RENDER_CONFIG_FORMAT) |
|
||||
VC4_SET_FIELD(i->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
|
||||
|
||||
commandBuffer->submitCl.clear_color[0] = i->clearColor[0];
|
||||
commandBuffer->submitCl.clear_color[1] = i->clearColor[1];
|
||||
|
||||
//TODO ranges
|
||||
commandBuffer->submitCl.min_x_tile = 0;
|
||||
commandBuffer->submitCl.min_y_tile = 0;
|
||||
|
||||
uint32_t tileSizeW = 64;
|
||||
uint32_t tileSizeH = 64;
|
||||
|
||||
if(i->samples > 1)
|
||||
{
|
||||
tileSizeW >>= 1;
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
if(getFormatBpp(i->format) == 64)
|
||||
{
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
uint32_t widthInTiles = divRoundUp(i->width, tileSizeW);
|
||||
uint32_t heightInTiles = divRoundUp(i->height, tileSizeH);
|
||||
|
||||
commandBuffer->submitCl.max_x_tile = widthInTiles - 1;
|
||||
commandBuffer->submitCl.max_y_tile = heightInTiles - 1;
|
||||
commandBuffer->submitCl.width = i->width;
|
||||
commandBuffer->submitCl.height = i->height;
|
||||
commandBuffer->submitCl.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
|
||||
commandBuffer->submitCl.clear_z = 0; //TODO
|
||||
commandBuffer->submitCl.clear_s = 0;
|
||||
}
|
||||
|
||||
//transition to new layout
|
||||
i->layout = pImageMemoryBarriers[c].newLayout;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDeviceWaitIdle
|
||||
* vkDeviceWaitIdle is equivalent to calling vkQueueWaitIdle for all queues owned by device.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle(
|
||||
VkDevice device)
|
||||
{
|
||||
assert(device);
|
||||
|
||||
for(int c = 0; c < numQueueFamilies; ++c)
|
||||
{
|
||||
for(int d = 0; d < device->numQueues[c]; ++d)
|
||||
{
|
||||
uint64_t lastFinishedSeqno;
|
||||
vc4_seqno_wait(controlFd, &lastFinishedSeqno, device->queues[c][d].lastEmitSeqno, WAIT_TIMEOUT_INFINITE);
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySemaphore
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore(
|
||||
VkDevice device,
|
||||
VkSemaphore semaphore,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
assert(semaphore);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
sem_destroy((sem_t*)semaphore);
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
VkPhysicalDeviceLimits _limits =
|
||||
static VkPhysicalDeviceLimits _limits =
|
||||
{
|
||||
//TODO these values might change
|
||||
.maxImageDimension1D = 16384,
|
||||
@ -113,7 +113,7 @@ VkPhysicalDeviceLimits _limits =
|
||||
.nonCoherentAtomSize = 0x40
|
||||
};
|
||||
|
||||
VkPhysicalDeviceFeatures _features =
|
||||
static VkPhysicalDeviceFeatures _features =
|
||||
{
|
||||
//TODO this might change
|
||||
.robustBufferAccess = 1,
|
||||
@ -174,7 +174,7 @@ VkPhysicalDeviceFeatures _features =
|
||||
};
|
||||
#define numFeatures (sizeof(_features)/sizeof(VkBool32))
|
||||
|
||||
VkQueueFamilyProperties _queueFamilyProperties[] =
|
||||
static VkQueueFamilyProperties _queueFamilyProperties[] =
|
||||
{
|
||||
{
|
||||
.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
|
||||
@ -185,7 +185,7 @@ VkQueueFamilyProperties _queueFamilyProperties[] =
|
||||
};
|
||||
#define numQueueFamilies (sizeof(_queueFamilyProperties)/sizeof(VkQueueFamilyProperties))
|
||||
|
||||
VkSurfaceFormatKHR supportedSurfaceFormats[] =
|
||||
static VkSurfaceFormatKHR supportedSurfaceFormats[] =
|
||||
{
|
||||
{
|
||||
.format = VK_FORMAT_R8G8B8A8_UNORM,
|
||||
@ -259,29 +259,3 @@ static VkExtensionProperties deviceExtensions[] =
|
||||
}
|
||||
};
|
||||
#define numDeviceExtensions (sizeof(deviceExtensions) / sizeof(VkExtensionProperties))
|
||||
|
||||
int findInstanceExtension(char* name)
|
||||
{
|
||||
for(int c = 0; c < numInstanceExtensions; ++c)
|
||||
{
|
||||
if(strcmp(instanceExtensions[c].extensionName, name) == 0)
|
||||
{
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int findDeviceExtension(char* name)
|
||||
{
|
||||
for(int c = 0; c < numDeviceExtensions; ++c)
|
||||
{
|
||||
if(strcmp(deviceExtensions[c].extensionName, name) == 0)
|
||||
{
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
381
driver/wsi.c
Normal file
381
driver/wsi.c
Normal file
@ -0,0 +1,381 @@
|
||||
#include "common.h"
|
||||
|
||||
/*
|
||||
* Implementation of our RPI specific "extension"
|
||||
*/
|
||||
VkResult vkCreateRpiSurfaceKHR(
|
||||
VkInstance instance,
|
||||
const VkRpiSurfaceCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSurfaceKHR* pSurface)
|
||||
{
|
||||
assert(instance);
|
||||
//assert(pCreateInfo); //ignored for now
|
||||
assert(pSurface);
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
*pSurface = (VkSurfaceKHR)modeset_create(controlFd);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySurfaceKHR
|
||||
* Destroying a VkSurfaceKHR merely severs the connection between Vulkan and the native surface,
|
||||
* and does not imply destroying the native surface, closing a window, or similar behavior
|
||||
* (but we'll do so anyways...)
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR(
|
||||
VkInstance instance,
|
||||
VkSurfaceKHR surface,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(instance);
|
||||
assert(surface);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
modeset_destroy(controlFd, (modeset_dev*)surface);
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceCapabilitiesKHR
|
||||
* The capabilities of a swapchain targetting a surface are the intersection of the capabilities of the WSI platform,
|
||||
* the native window or display, and the physical device. The resulting capabilities can be obtained with the queries listed
|
||||
* below in this section. Capabilities that correspond to image creation parameters are not independent of each other:
|
||||
* combinations of parameters that are not supported as reported by vkGetPhysicalDeviceImageFormatProperties are not supported
|
||||
* by the surface on that physical device, even if the capabilities taken individually are supported as part of some other parameter combinations.
|
||||
*
|
||||
* capabilities the specified device supports for a swapchain created for the surface
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR surface,
|
||||
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(surface);
|
||||
assert(pSurfaceCapabilities);
|
||||
|
||||
pSurfaceCapabilities->minImageCount = 1; //min 1
|
||||
pSurfaceCapabilities->maxImageCount = 2; //TODO max 2 for double buffering for now...
|
||||
pSurfaceCapabilities->currentExtent.width = ((modeset_dev*)surface)->width;
|
||||
pSurfaceCapabilities->currentExtent.height = ((modeset_dev*)surface)->height;
|
||||
pSurfaceCapabilities->minImageExtent.width = ((modeset_dev*)surface)->width; //TODO
|
||||
pSurfaceCapabilities->minImageExtent.height = ((modeset_dev*)surface)->height; //TODO
|
||||
pSurfaceCapabilities->maxImageExtent.width = ((modeset_dev*)surface)->width; //TODO
|
||||
pSurfaceCapabilities->maxImageExtent.height = ((modeset_dev*)surface)->height; //TODO
|
||||
pSurfaceCapabilities->maxImageArrayLayers = 1; //TODO maybe more layers for cursor etc.
|
||||
pSurfaceCapabilities->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO no rotation for now
|
||||
pSurfaceCapabilities->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; //TODO get this from dev
|
||||
pSurfaceCapabilities->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; //TODO no alpha compositing for now
|
||||
pSurfaceCapabilities->supportedUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; //well we want to draw on the screen right
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfaceFormatsKHR
|
||||
* If pSurfaceFormats is NULL, then the number of format pairs supported for the given surface is returned in pSurfaceFormatCount.
|
||||
* The number of format pairs supported will be greater than or equal to 1. Otherwise, pSurfaceFormatCount must point to a variable
|
||||
* set by the user to the number of elements in the pSurfaceFormats array, and on return the variable is overwritten with the number
|
||||
* of structures actually written to pSurfaceFormats. If the value of pSurfaceFormatCount is less than the number of format pairs supported,
|
||||
* at most pSurfaceFormatCount structures will be written. If pSurfaceFormatCount is smaller than the number of format pairs supported for the given surface,
|
||||
* VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR surface,
|
||||
uint32_t* pSurfaceFormatCount,
|
||||
VkSurfaceFormatKHR* pSurfaceFormats)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(surface);
|
||||
assert(pSurfaceFormatCount);
|
||||
|
||||
const int numFormats = 1;
|
||||
|
||||
if(!pSurfaceFormats)
|
||||
{
|
||||
*pSurfaceFormatCount = numFormats;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pSurfaceFormatCount;
|
||||
int elementsWritten = min(numFormats, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pSurfaceFormats[c] = supportedSurfaceFormats[c];
|
||||
}
|
||||
|
||||
*pSurfaceFormatCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < numFormats)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetPhysicalDeviceSurfacePresentModesKHR
|
||||
* If pPresentModes is NULL, then the number of presentation modes supported for the given surface is returned in pPresentModeCount.
|
||||
* Otherwise, pPresentModeCount must point to a variable set by the user to the number of elements in the pPresentModes array,
|
||||
* and on return the variable is overwritten with the number of values actually written to pPresentModes.
|
||||
* If the value of pPresentModeCount is less than the number of presentation modes supported, at most pPresentModeCount values will be written.
|
||||
* If pPresentModeCount is smaller than the number of presentation modes supported for the given surface, VK_INCOMPLETE will be returned instead of
|
||||
* VK_SUCCESS to indicate that not all the available values were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR surface,
|
||||
uint32_t* pPresentModeCount,
|
||||
VkPresentModeKHR* pPresentModes)
|
||||
{
|
||||
assert(physicalDevice);
|
||||
assert(surface);
|
||||
assert(pPresentModeCount);
|
||||
|
||||
const int numModes = 1;
|
||||
|
||||
if(!pPresentModes)
|
||||
{
|
||||
*pPresentModeCount = numModes;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pPresentModeCount;
|
||||
int elementsWritten = min(numModes, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
//TODO
|
||||
pPresentModes[c] = VK_PRESENT_MODE_FIFO_KHR;
|
||||
}
|
||||
|
||||
*pPresentModeCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < numModes)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateSwapchainKHR
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR(
|
||||
VkDevice device,
|
||||
const VkSwapchainCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSwapchainKHR* pSwapchain)
|
||||
{
|
||||
assert(device);
|
||||
assert(pCreateInfo);
|
||||
assert(pSwapchain);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
*pSwapchain = malloc(sizeof(_swapchain));
|
||||
if(!*pSwapchain)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
_swapchain* s = *pSwapchain;
|
||||
|
||||
//TODO flags, layers, queue sharing, pretransform, composite alpha, present mode..., clipped, oldswapchain
|
||||
//TODO external sync on surface, oldswapchain
|
||||
|
||||
s->images = malloc(sizeof(_image) * pCreateInfo->minImageCount);
|
||||
if(!s->images)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
s->backbufferIdx = 0;
|
||||
s->numImages = pCreateInfo->minImageCount;
|
||||
s->surface = pCreateInfo->surface;
|
||||
|
||||
for(int c = 0; c < pCreateInfo->minImageCount; ++c)
|
||||
{
|
||||
s->images[c].width = pCreateInfo->imageExtent.width;
|
||||
s->images[c].height = pCreateInfo->imageExtent.height;
|
||||
s->images[c].depth = 1;
|
||||
s->images[c].layers = pCreateInfo->imageArrayLayers;
|
||||
s->images[c].miplevels = 1;
|
||||
s->images[c].samples = 1; //TODO
|
||||
s->images[c].usageBits = pCreateInfo->imageUsage;
|
||||
s->images[c].format = pCreateInfo->imageFormat;
|
||||
s->images[c].imageSpace = pCreateInfo->imageColorSpace;
|
||||
s->images[c].concurrentAccess = pCreateInfo->imageSharingMode;
|
||||
s->images[c].numQueueFamiliesWithAccess = pCreateInfo->queueFamilyIndexCount;
|
||||
if(s->images[c].concurrentAccess)
|
||||
{
|
||||
s->images[c].queueFamiliesWithAccess = malloc(sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
|
||||
memcpy(s->images[c].queueFamiliesWithAccess, pCreateInfo->pQueueFamilyIndices, sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
|
||||
}
|
||||
s->images[c].preTransformMode = pCreateInfo->preTransform;
|
||||
s->images[c].compositeAlpha = pCreateInfo->compositeAlpha;
|
||||
s->images[c].presentMode = pCreateInfo->presentMode;
|
||||
s->images[c].clipped = pCreateInfo->clipped;
|
||||
|
||||
createImageBO(&s->images[c]);
|
||||
int res = modeset_create_fb(controlFd, &s->images[c]); assert(res == 0);
|
||||
}
|
||||
|
||||
//defer to first swapbuffer (or at least later, getting swapchain != presenting immediately)
|
||||
//int res = modeset_fb_for_dev(controlFd, s->surface, &s->images[s->backbufferIdx]); assert(res == 0);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkGetSwapchainImagesKHR
|
||||
* If pSwapchainImages is NULL, then the number of presentable images for swapchain is returned in pSwapchainImageCount.
|
||||
* Otherwise, pSwapchainImageCount must point to a variable set by the user to the number of elements in the pSwapchainImages array,
|
||||
* and on return the variable is overwritten with the number of structures actually written to pSwapchainImages.
|
||||
* If the value of pSwapchainImageCount is less than the number of presentable images for swapchain, at most pSwapchainImageCount structures will be written.
|
||||
* If pSwapchainImageCount is smaller than the number of presentable images for swapchain, VK_INCOMPLETE will be returned instead of VK_SUCCESS to
|
||||
* indicate that not all the available values were returned.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain,
|
||||
uint32_t* pSwapchainImageCount,
|
||||
VkImage* pSwapchainImages)
|
||||
{
|
||||
assert(device);
|
||||
assert(swapchain);
|
||||
assert(pSwapchainImageCount);
|
||||
|
||||
_swapchain* s = swapchain;
|
||||
|
||||
if(!pSwapchainImages)
|
||||
{
|
||||
*pSwapchainImageCount = s->numImages;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int arraySize = *pSwapchainImageCount;
|
||||
int elementsWritten = min(s->numImages, arraySize);
|
||||
|
||||
for(int c = 0; c < elementsWritten; ++c)
|
||||
{
|
||||
pSwapchainImages[c] = &s->images[c];
|
||||
}
|
||||
|
||||
*pSwapchainImageCount = elementsWritten;
|
||||
|
||||
if(elementsWritten < s->numImages)
|
||||
{
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkAcquireNextImageKHR
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain,
|
||||
uint64_t timeout,
|
||||
VkSemaphore semaphore,
|
||||
VkFence fence,
|
||||
uint32_t* pImageIndex)
|
||||
{
|
||||
assert(device);
|
||||
assert(swapchain);
|
||||
|
||||
assert(semaphore != VK_NULL_HANDLE || fence != VK_NULL_HANDLE);
|
||||
|
||||
sem_t* s = semaphore;
|
||||
|
||||
//TODO we need to keep track of currently acquired images?
|
||||
|
||||
//TODO wait timeout?
|
||||
|
||||
*pImageIndex = ((_swapchain*)swapchain)->backbufferIdx; //return back buffer index
|
||||
|
||||
//signal semaphore
|
||||
int semVal; sem_getvalue(s, &semVal); assert(semVal <= 0); //make sure semaphore is unsignalled
|
||||
sem_post(s);
|
||||
|
||||
//TODO signal fence
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkQueuePresentKHR
|
||||
* Any writes to memory backing the images referenced by the pImageIndices and pSwapchains members of pPresentInfo,
|
||||
* that are available before vkQueuePresentKHR is executed, are automatically made visible to the read access performed by the presentation engine.
|
||||
* This automatic visibility operation for an image happens-after the semaphore signal operation, and happens-before the presentation engine accesses the image.
|
||||
* Queueing an image for presentation defines a set of queue operations, including waiting on the semaphores and submitting a presentation request to the presentation engine.
|
||||
* However, the scope of this set of queue operations does not include the actual processing of the image by the presentation engine.
|
||||
* If vkQueuePresentKHR fails to enqueue the corresponding set of queue operations, it may return VK_ERROR_OUT_OF_HOST_MEMORY or VK_ERROR_OUT_OF_DEVICE_MEMORY.
|
||||
* If it does, the implementation must ensure that the state and contents of any resources or synchronization primitives referenced is unaffected by the call or its failure.
|
||||
* If vkQueuePresentKHR fails in such a way that the implementation is unable to make that guarantee, the implementation must return VK_ERROR_DEVICE_LOST.
|
||||
* However, if the presentation request is rejected by the presentation engine with an error VK_ERROR_OUT_OF_DATE_KHR or VK_ERROR_SURFACE_LOST_KHR,
|
||||
* the set of queue operations are still considered to be enqueued and thus any semaphore to be waited on gets unsignaled when the corresponding queue operation is complete.
|
||||
*/
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR(
|
||||
VkQueue queue,
|
||||
const VkPresentInfoKHR* pPresentInfo)
|
||||
{
|
||||
assert(queue);
|
||||
assert(pPresentInfo);
|
||||
|
||||
//wait for semaphore in present info set by submit ioctl to make sure cls are flushed
|
||||
for(int c = 0; c < pPresentInfo->waitSemaphoreCount; ++c)
|
||||
{
|
||||
sem_wait((sem_t*)pPresentInfo->pWaitSemaphores[c]);
|
||||
}
|
||||
|
||||
for(int c = 0; c < pPresentInfo->swapchainCount; ++c)
|
||||
{
|
||||
_swapchain* s = pPresentInfo->pSwapchains[c];
|
||||
modeset_present_buffer(controlFd, (modeset_dev*)s->surface, &s->images[s->backbufferIdx]);
|
||||
s->backbufferIdx = (s->backbufferIdx + 1) % s->numImages;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkDestroySwapchainKHR
|
||||
*/
|
||||
VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(device);
|
||||
assert(swapchain);
|
||||
|
||||
//TODO: allocator is ignored for now
|
||||
assert(pAllocator == 0);
|
||||
|
||||
//TODO flush all ops
|
||||
|
||||
_swapchain* s = swapchain;
|
||||
|
||||
for(int c = 0; c < s->numImages; ++c)
|
||||
{
|
||||
vc4_bo_free(controlFd, s->images[c].handle, 0, s->images->size);
|
||||
modeset_destroy_fb(controlFd, &s->images[c]);
|
||||
}
|
||||
|
||||
free(s->images);
|
||||
free(s);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user