1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-02-19 16:54:18 +01:00

started to rework control list submission so that multiple drawcalls wok

This commit is contained in:
Unknown 2019-09-01 19:14:47 +01:00
parent 1b7fddc5a0
commit 1e5a5c965a
7 changed files with 212 additions and 51 deletions

View File

@ -13,17 +13,12 @@ uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset)
return (d << offset) & (~(~0 << bits) << offset);
}
uint32_t clSize(ControlList* cl)
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
return cl->nextFreeByte - cl->buffer;
}
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
{
uint32_t currSize = clSize(cl);
uint32_t currSize = cl->nextFreeByte - cl->buffer;;
if(currSize + size < CONTROL_LIST_SIZE)
{
return 1; //fits!
@ -41,6 +36,33 @@ void clInit(ControlList* cl, void* buffer)
cl->buffer = buffer;
cl->numBlocks = 1;
cl->nextFreeByte = &cl->buffer[0];
cl->currMarker = 0;
}
void clInsertNewCLMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, ControlList* uniformsCL, void* imagePtr)
{
//to be inserted when you'd insert tile binning mode config
assert(cl);
assert(handlesCL);
assert(shaderRecCL);
assert(uniformsCL);
assert(imagePtr);
CLMarker marker;
marker.nextMarker = 0;
marker.size = 0;
marker.image = imagePtr;
marker.handles = cl->currMarker ? cl->currMarker->handles + cl->currMarker->handlesSize : handlesCL;
marker.handlesSize = 0;
marker.shaderRec = cl->currMarker ? cl->currMarker->shaderRec + cl->currMarker->shaderRecSize : shaderRecCL;
marker.shaderRecSize = 0;
marker.shaderRecCount = 0;
marker.uniforms = cl->currMarker ? cl->currMarker->uniforms + cl->currMarker->uniformsSize : uniformsCL;
marker.uniformsSize = 0;
marker.flags = 0;
*(CLMarker*)cl->nextFreeByte = marker;
cl->nextFreeByte += sizeof(CLMarker);
}
void clInsertData(ControlList* cl, uint32_t size, uint8_t* data)

View File

@ -12,6 +12,21 @@ typedef struct ControlListAddress
uint32_t offset; //offset within buffer object
} ControlListAddress;
typedef struct CLMarker
{
struct CLMarker* nextMarker;
uint32_t size; //in bytes
void* image; //_image* to render to
uint8_t* handles;
uint32_t handlesSize;
uint8_t* shaderRec;
uint32_t shaderRecSize;
uint32_t shaderRecCount;
uint8_t* uniforms;
uint32_t uniformsSize;
uint32_t flags; //used to store clear flag etc.
} CLMarker;
#define CONTROL_LIST_SIZE 4096
typedef struct ControlList
@ -19,6 +34,7 @@ typedef struct ControlList
uint8_t* buffer; //TODO size?
uint32_t numBlocks;
uint8_t* nextFreeByte; //pointer to the next available free byte
CLMarker* currMarker;
} ControlList;
void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, const ControlListAddress* address);
@ -35,9 +51,9 @@ void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address);
uint32_t divRoundUp(uint32_t n, uint32_t d);
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset);
uint32_t clSize(ControlList* cl);
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size);
void clInit(ControlList* cl, void* buffer);
void clInsertNewCLMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, ControlList* uniformsCL, void* imagePtr);
void clInsertData(ControlList* cl, uint32_t size, uint8_t* data);
void clInsertUniformConstant(ControlList* cl, uint32_t data);
void clInsertUniformXYScale(ControlList* cl, float data);

View File

@ -110,6 +110,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
break;
}
pCommandBuffers[c]->dev = device;
pCommandBuffers[c]->shaderRecCount = 0;
pCommandBuffers[c]->usageFlags = 0;
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
@ -124,7 +126,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
pCommandBuffers[c]->currentSubpass = 0;
pCommandBuffers[c]->graphicsPipeline = 0;
pCommandBuffers[c]->computePipeline = 0;
pCommandBuffers[c]->firstDraw = 1;
pCommandBuffers[c]->numDrawCallsSubmitted = 0;
pCommandBuffers[c]->vertexBufferDirty = 1;
pCommandBuffers[c]->indexBufferDirty = 1;
pCommandBuffers[c]->viewportDirty = 1;
@ -300,47 +302,113 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
{
VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c];
cmdbuf->submitCl.bo_handles = cmdbuf->handlesCl.buffer;
cmdbuf->submitCl.bo_handle_count = clSize(&cmdbuf->handlesCl) / 4;
cmdbuf->submitCl.bin_cl = cmdbuf->binCl.buffer;
cmdbuf->submitCl.bin_cl_size = clSize(&cmdbuf->binCl);
cmdbuf->submitCl.shader_rec = cmdbuf->shaderRecCl.buffer;
cmdbuf->submitCl.shader_rec_size = clSize(&cmdbuf->shaderRecCl);
cmdbuf->submitCl.shader_rec_count = cmdbuf->shaderRecCount;
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
//first entry is assumed to be a marker
CLMarker* marker = cmdbuf->binCl.buffer;
/**/
printf("BCL:\n");
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
printf("BO handles: ");
for(int d = 0; d < cmdbuf->submitCl.bo_handle_count; ++d)
//submit each separate job
while(marker)
{
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.bo_handles)+d));
}
printf("\nUniforms: ");
for(int d = 0; d < cmdbuf->submitCl.uniforms_size / 4; ++d)
{
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.uniforms)+d));
}
printf("\nwidth height: %u, %u\n", cmdbuf->submitCl.width, cmdbuf->submitCl.height);
printf("tile min/max: %u,%u %u,%u\n", cmdbuf->submitCl.min_x_tile, cmdbuf->submitCl.min_y_tile, cmdbuf->submitCl.max_x_tile, cmdbuf->submitCl.max_y_tile);
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_read.hindex, cmdbuf->submitCl.color_read.offset, cmdbuf->submitCl.color_read.bits, cmdbuf->submitCl.color_read.flags);
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_write.hindex, cmdbuf->submitCl.color_write.offset, cmdbuf->submitCl.color_write.bits, cmdbuf->submitCl.color_write.flags);
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_read.hindex, cmdbuf->submitCl.zs_read.offset, cmdbuf->submitCl.zs_read.bits, cmdbuf->submitCl.zs_read.flags);
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_write.hindex, cmdbuf->submitCl.zs_write.offset, cmdbuf->submitCl.zs_write.bits, cmdbuf->submitCl.zs_write.flags);
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_color_write.hindex, cmdbuf->submitCl.msaa_color_write.offset, cmdbuf->submitCl.msaa_color_write.bits, cmdbuf->submitCl.msaa_color_write.flags);
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_zs_write.hindex, cmdbuf->submitCl.msaa_zs_write.offset, cmdbuf->submitCl.msaa_zs_write.bits, cmdbuf->submitCl.msaa_zs_write.flags);
printf("clear color packed rgba %u %u\n", cmdbuf->submitCl.clear_color[0], cmdbuf->submitCl.clear_color[1]);
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
printf("flags %u\n", cmdbuf->submitCl.flags);
/**/
struct drm_vc4_submit_cl submitCl =
{
.color_read.hindex = ~0,
.zs_read.hindex = ~0,
.color_write.hindex = ~0,
.msaa_color_write.hindex = ~0,
.zs_write.hindex = ~0,
.msaa_zs_write.hindex = ~0,
};
ControlList* handles = marker->handles;
_image* i = marker->image;
//Insert image handle index
clFit(cmdbuf, handles, 4);
uint32_t imageIdx = clGetHandleIndex(handles, i->boundMem->bo);
//fill out submit cl fields
submitCl.color_write.hindex = imageIdx;
submitCl.color_write.offset = 0;
submitCl.color_write.flags = 0;
submitCl.color_write.bits =
VC4_SET_FIELD(getRenderTargetFormatVC4(i->format), VC4_RENDER_CONFIG_FORMAT) |
VC4_SET_FIELD(i->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
submitCl.clear_color[0] = i->clearColor[0];
submitCl.clear_color[1] = i->clearColor[1];
submitCl.min_x_tile = 0;
submitCl.min_y_tile = 0;
uint32_t tileSizeW = 64;
uint32_t tileSizeH = 64;
if(i->samples > 1)
{
tileSizeW >>= 1;
tileSizeH >>= 1;
}
if(getFormatBpp(i->format) == 64)
{
tileSizeH >>= 1;
}
uint32_t widthInTiles = divRoundUp(i->width, tileSizeW);
uint32_t heightInTiles = divRoundUp(i->height, tileSizeH);
submitCl.max_x_tile = widthInTiles - 1;
submitCl.max_y_tile = heightInTiles - 1;
submitCl.width = i->width;
submitCl.height = i->height;
submitCl.flags |= marker->flags;//VC4_SUBMIT_CL_USE_CLEAR_COLOR;
submitCl.clear_z = 0; //TODO
submitCl.clear_s = 0;
submitCl.bo_handles = marker->handles;
submitCl.bo_handle_count = marker->handlesSize / 4;
submitCl.bin_cl = ((uint8_t*)marker) + sizeof(CLMarker);
submitCl.bin_cl_size = marker->size;
submitCl.shader_rec = marker->shaderRec;
submitCl.shader_rec_size = marker->shaderRecSize;
submitCl.shader_rec_count = marker->shaderRecCount;
submitCl.uniforms = marker->uniforms;
submitCl.uniforms_size = marker->uniformsSize;
/**/
printf("BCL:\n");
clDump(((uint8_t*)marker) + sizeof(CLMarker), marker->size);
printf("BO handles: ");
for(int d = 0; d < marker->handlesSize / 4; ++d)
{
printf("%u ", *((uint32_t*)(marker->handles)+d));
}
printf("\nUniforms: ");
for(int d = 0; d < marker->uniformsSize / 4; ++d)
{
printf("%u ", *((uint32_t*)(marker->uniforms)+d));
}
printf("\nwidth height: %u, %u\n", submitCl.width, submitCl.height);
printf("tile min/max: %u,%u %u,%u\n", submitCl.min_x_tile, submitCl.min_y_tile, submitCl.max_x_tile, submitCl.max_y_tile);
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.color_read.hindex, submitCl.color_read.offset, submitCl.color_read.bits, submitCl.color_read.flags);
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.color_write.hindex, submitCl.color_write.offset, submitCl.color_write.bits, submitCl.color_write.flags);
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.zs_read.hindex, submitCl.zs_read.offset, submitCl.zs_read.bits, submitCl.zs_read.flags);
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.zs_write.hindex, submitCl.zs_write.offset, submitCl.zs_write.bits, submitCl.zs_write.flags);
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.msaa_color_write.hindex, submitCl.msaa_color_write.offset, submitCl.msaa_color_write.bits, submitCl.msaa_color_write.flags);
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.msaa_zs_write.hindex, submitCl.msaa_zs_write.offset, submitCl.msaa_zs_write.bits, submitCl.msaa_zs_write.flags);
printf("clear color packed rgba %u %u\n", submitCl.clear_color[0], submitCl.clear_color[1]);
printf("clear z %u\n", submitCl.clear_z);
printf("clear s %u\n", submitCl.clear_s);
printf("flags %u\n", submitCl.flags);
/**/
//submit ioctl
static uint64_t lastFinishedSeqno = 0;
vc4_cl_submit(controlFd, &cmdbuf->submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
//submit ioctl
static uint64_t lastFinishedSeqno = 0;
vc4_cl_submit(controlFd, &submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
//advance in linked list
marker = marker->nextMarker;
}
}
for(int c = 0; c < pSubmits->commandBufferCount; ++c)

View File

@ -732,6 +732,23 @@ uint8_t getWrapMode(VkSamplerAddressMode mode)
}
}
uint32_t getRenderTargetFormatVC4(VkFormat format)
{
//TODO dithered BGR565
switch(format)
{
case VK_FORMAT_R16G16B16A16_SFLOAT: //HDR mode set in tile binning config mode, so just return a valid format
case VK_FORMAT_R8G8B8A8_UNORM:
return VC4_RENDER_CONFIG_FORMAT_RGBA8888;
case VK_FORMAT_B5G6R5_UNORM_PACK16:
return VC4_RENDER_CONFIG_FORMAT_BGR565;
default:
printf("unsupported rendertarget format: %i\n", format);
assert(0);
return -1;
}
}
////////////////////////////////////////////////////
////////////////////////////////////////////////////
/// just so we can return a function pointer, TODO

View File

@ -294,8 +294,6 @@ typedef struct VkCommandBuffer_T
//Recorded commands include commands to bind pipelines and descriptor sets to the command buffer, commands to modify dynamic state, commands to draw (for graphics rendering),
//commands to dispatch (for compute), commands to execute secondary command buffers (for primary command buffers only), commands to copy buffers and images, and other commands
struct drm_vc4_submit_cl submitCl;
//Rpi only supports vertex and pixel shaders
//(coordinate shaders will just use the vertex shader push constants)
//anything else will be ignored I guess
@ -318,7 +316,7 @@ typedef struct VkCommandBuffer_T
_pipeline* graphicsPipeline;
_pipeline* computePipeline;
uint32_t firstDraw; //so we can set tile binning config etc.
uint32_t numDrawCallsSubmitted;
uint32_t vertexBufferDirty;
uint32_t indexBufferDirty;
@ -475,5 +473,6 @@ void encodeTextureUniform(uint32_t* params,
uint8_t getTextureDataType(VkFormat format);
uint8_t getMinFilterType(VkFilter minFilter, VkSamplerMipmapMode mipFilter, float maxLod);
uint8_t getWrapMode(VkSamplerAddressMode mode);
uint32_t getRenderTargetFormatVC4(VkFormat format);
void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize);
void clDump(void* cl, uint32_t size);

View File

@ -388,6 +388,8 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
}
}
}
cb->numDrawCallsSubmitted++;
}
VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed(

View File

@ -96,8 +96,45 @@ VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage(
//TODO externally sync cmdbuf, cmdpool
i->needToClear = 1;
i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
//i->needToClear = 1;
//i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
{ //Simplest case: just submit a job to clear the image
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
0, //double buffer in non ms mode
0, //tile allocation block size
0, //tile allocation initial block size
0, //auto initialize tile state data array
getFormatBpp(i->format) == 64, //64 bit color mode
i->samples > 1, //msaa
i->width, i->height,
0, //tile state data array address
0, //tile allocation memory size
0); //tile allocation memory address
//START_TILE_BINNING resets the statechange counters in the hardware,
//which are what is used when a primitive is binned to a tile to
//figure out what new state packets need to be written to that tile's
//command list.
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
clInsertStartTileBinning(&commandBuffer->binCl);
//Reset the current compressed primitives format. This gets modified
//by VC4_PACKET_GL_INDEXED_PRIMITIVE and
//VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
//of every tile.
//clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length);
//clInsertPrimitiveListFormat(&commandBuffer->binCl,
// 1, //16 bit
// 2); //tris
clFit(commandBuffer, &commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length);
clInsertIncrementSemaphore(&commandBuffer->binCl);
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length);
clInsertFlush(&commandBuffer->binCl);
}
}
/*