mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2025-02-19 16:54:18 +01:00
started to rework control list submission so that multiple drawcalls wok
This commit is contained in:
parent
1b7fddc5a0
commit
1e5a5c965a
@ -13,17 +13,12 @@ uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset)
|
||||
return (d << offset) & (~(~0 << bits) << offset);
|
||||
}
|
||||
|
||||
uint32_t clSize(ControlList* cl)
|
||||
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
|
||||
{
|
||||
assert(cl);
|
||||
assert(cl->buffer);
|
||||
assert(cl->nextFreeByte);
|
||||
return cl->nextFreeByte - cl->buffer;
|
||||
}
|
||||
|
||||
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
|
||||
{
|
||||
uint32_t currSize = clSize(cl);
|
||||
uint32_t currSize = cl->nextFreeByte - cl->buffer;;
|
||||
if(currSize + size < CONTROL_LIST_SIZE)
|
||||
{
|
||||
return 1; //fits!
|
||||
@ -41,6 +36,33 @@ void clInit(ControlList* cl, void* buffer)
|
||||
cl->buffer = buffer;
|
||||
cl->numBlocks = 1;
|
||||
cl->nextFreeByte = &cl->buffer[0];
|
||||
cl->currMarker = 0;
|
||||
}
|
||||
|
||||
void clInsertNewCLMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, ControlList* uniformsCL, void* imagePtr)
|
||||
{
|
||||
//to be inserted when you'd insert tile binning mode config
|
||||
assert(cl);
|
||||
assert(handlesCL);
|
||||
assert(shaderRecCL);
|
||||
assert(uniformsCL);
|
||||
assert(imagePtr);
|
||||
|
||||
CLMarker marker;
|
||||
marker.nextMarker = 0;
|
||||
marker.size = 0;
|
||||
marker.image = imagePtr;
|
||||
marker.handles = cl->currMarker ? cl->currMarker->handles + cl->currMarker->handlesSize : handlesCL;
|
||||
marker.handlesSize = 0;
|
||||
marker.shaderRec = cl->currMarker ? cl->currMarker->shaderRec + cl->currMarker->shaderRecSize : shaderRecCL;
|
||||
marker.shaderRecSize = 0;
|
||||
marker.shaderRecCount = 0;
|
||||
marker.uniforms = cl->currMarker ? cl->currMarker->uniforms + cl->currMarker->uniformsSize : uniformsCL;
|
||||
marker.uniformsSize = 0;
|
||||
marker.flags = 0;
|
||||
|
||||
*(CLMarker*)cl->nextFreeByte = marker;
|
||||
cl->nextFreeByte += sizeof(CLMarker);
|
||||
}
|
||||
|
||||
void clInsertData(ControlList* cl, uint32_t size, uint8_t* data)
|
||||
|
@ -12,6 +12,21 @@ typedef struct ControlListAddress
|
||||
uint32_t offset; //offset within buffer object
|
||||
} ControlListAddress;
|
||||
|
||||
typedef struct CLMarker
|
||||
{
|
||||
struct CLMarker* nextMarker;
|
||||
uint32_t size; //in bytes
|
||||
void* image; //_image* to render to
|
||||
uint8_t* handles;
|
||||
uint32_t handlesSize;
|
||||
uint8_t* shaderRec;
|
||||
uint32_t shaderRecSize;
|
||||
uint32_t shaderRecCount;
|
||||
uint8_t* uniforms;
|
||||
uint32_t uniformsSize;
|
||||
uint32_t flags; //used to store clear flag etc.
|
||||
} CLMarker;
|
||||
|
||||
#define CONTROL_LIST_SIZE 4096
|
||||
|
||||
typedef struct ControlList
|
||||
@ -19,6 +34,7 @@ typedef struct ControlList
|
||||
uint8_t* buffer; //TODO size?
|
||||
uint32_t numBlocks;
|
||||
uint8_t* nextFreeByte; //pointer to the next available free byte
|
||||
CLMarker* currMarker;
|
||||
} ControlList;
|
||||
|
||||
void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, const ControlListAddress* address);
|
||||
@ -35,9 +51,9 @@ void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address);
|
||||
|
||||
uint32_t divRoundUp(uint32_t n, uint32_t d);
|
||||
uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset);
|
||||
uint32_t clSize(ControlList* cl);
|
||||
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size);
|
||||
void clInit(ControlList* cl, void* buffer);
|
||||
void clInsertNewCLMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, ControlList* uniformsCL, void* imagePtr);
|
||||
void clInsertData(ControlList* cl, uint32_t size, uint8_t* data);
|
||||
void clInsertUniformConstant(ControlList* cl, uint32_t data);
|
||||
void clInsertUniformXYScale(ControlList* cl, float data);
|
||||
|
144
driver/command.c
144
driver/command.c
@ -110,6 +110,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
|
||||
break;
|
||||
}
|
||||
|
||||
pCommandBuffers[c]->dev = device;
|
||||
|
||||
pCommandBuffers[c]->shaderRecCount = 0;
|
||||
pCommandBuffers[c]->usageFlags = 0;
|
||||
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
|
||||
@ -124,7 +126,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
|
||||
pCommandBuffers[c]->currentSubpass = 0;
|
||||
pCommandBuffers[c]->graphicsPipeline = 0;
|
||||
pCommandBuffers[c]->computePipeline = 0;
|
||||
pCommandBuffers[c]->firstDraw = 1;
|
||||
pCommandBuffers[c]->numDrawCallsSubmitted = 0;
|
||||
pCommandBuffers[c]->vertexBufferDirty = 1;
|
||||
pCommandBuffers[c]->indexBufferDirty = 1;
|
||||
pCommandBuffers[c]->viewportDirty = 1;
|
||||
@ -300,47 +302,113 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit(
|
||||
{
|
||||
VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c];
|
||||
|
||||
cmdbuf->submitCl.bo_handles = cmdbuf->handlesCl.buffer;
|
||||
cmdbuf->submitCl.bo_handle_count = clSize(&cmdbuf->handlesCl) / 4;
|
||||
cmdbuf->submitCl.bin_cl = cmdbuf->binCl.buffer;
|
||||
cmdbuf->submitCl.bin_cl_size = clSize(&cmdbuf->binCl);
|
||||
cmdbuf->submitCl.shader_rec = cmdbuf->shaderRecCl.buffer;
|
||||
cmdbuf->submitCl.shader_rec_size = clSize(&cmdbuf->shaderRecCl);
|
||||
cmdbuf->submitCl.shader_rec_count = cmdbuf->shaderRecCount;
|
||||
cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer;
|
||||
cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl);
|
||||
//first entry is assumed to be a marker
|
||||
CLMarker* marker = cmdbuf->binCl.buffer;
|
||||
|
||||
/**/
|
||||
printf("BCL:\n");
|
||||
clDump(cmdbuf->submitCl.bin_cl, cmdbuf->submitCl.bin_cl_size);
|
||||
printf("BO handles: ");
|
||||
for(int d = 0; d < cmdbuf->submitCl.bo_handle_count; ++d)
|
||||
//submit each separate job
|
||||
while(marker)
|
||||
{
|
||||
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.bo_handles)+d));
|
||||
}
|
||||
printf("\nUniforms: ");
|
||||
for(int d = 0; d < cmdbuf->submitCl.uniforms_size / 4; ++d)
|
||||
{
|
||||
printf("%u ", *((uint32_t*)(cmdbuf->submitCl.uniforms)+d));
|
||||
}
|
||||
printf("\nwidth height: %u, %u\n", cmdbuf->submitCl.width, cmdbuf->submitCl.height);
|
||||
printf("tile min/max: %u,%u %u,%u\n", cmdbuf->submitCl.min_x_tile, cmdbuf->submitCl.min_y_tile, cmdbuf->submitCl.max_x_tile, cmdbuf->submitCl.max_y_tile);
|
||||
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_read.hindex, cmdbuf->submitCl.color_read.offset, cmdbuf->submitCl.color_read.bits, cmdbuf->submitCl.color_read.flags);
|
||||
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.color_write.hindex, cmdbuf->submitCl.color_write.offset, cmdbuf->submitCl.color_write.bits, cmdbuf->submitCl.color_write.flags);
|
||||
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_read.hindex, cmdbuf->submitCl.zs_read.offset, cmdbuf->submitCl.zs_read.bits, cmdbuf->submitCl.zs_read.flags);
|
||||
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.zs_write.hindex, cmdbuf->submitCl.zs_write.offset, cmdbuf->submitCl.zs_write.bits, cmdbuf->submitCl.zs_write.flags);
|
||||
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_color_write.hindex, cmdbuf->submitCl.msaa_color_write.offset, cmdbuf->submitCl.msaa_color_write.bits, cmdbuf->submitCl.msaa_color_write.flags);
|
||||
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", cmdbuf->submitCl.msaa_zs_write.hindex, cmdbuf->submitCl.msaa_zs_write.offset, cmdbuf->submitCl.msaa_zs_write.bits, cmdbuf->submitCl.msaa_zs_write.flags);
|
||||
printf("clear color packed rgba %u %u\n", cmdbuf->submitCl.clear_color[0], cmdbuf->submitCl.clear_color[1]);
|
||||
printf("clear z %u\n", cmdbuf->submitCl.clear_z);
|
||||
printf("clear s %u\n", cmdbuf->submitCl.clear_s);
|
||||
printf("flags %u\n", cmdbuf->submitCl.flags);
|
||||
/**/
|
||||
struct drm_vc4_submit_cl submitCl =
|
||||
{
|
||||
.color_read.hindex = ~0,
|
||||
.zs_read.hindex = ~0,
|
||||
.color_write.hindex = ~0,
|
||||
.msaa_color_write.hindex = ~0,
|
||||
.zs_write.hindex = ~0,
|
||||
.msaa_zs_write.hindex = ~0,
|
||||
};
|
||||
|
||||
ControlList* handles = marker->handles;
|
||||
_image* i = marker->image;
|
||||
|
||||
//Insert image handle index
|
||||
clFit(cmdbuf, handles, 4);
|
||||
uint32_t imageIdx = clGetHandleIndex(handles, i->boundMem->bo);
|
||||
|
||||
//fill out submit cl fields
|
||||
submitCl.color_write.hindex = imageIdx;
|
||||
submitCl.color_write.offset = 0;
|
||||
submitCl.color_write.flags = 0;
|
||||
submitCl.color_write.bits =
|
||||
VC4_SET_FIELD(getRenderTargetFormatVC4(i->format), VC4_RENDER_CONFIG_FORMAT) |
|
||||
VC4_SET_FIELD(i->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
|
||||
|
||||
submitCl.clear_color[0] = i->clearColor[0];
|
||||
submitCl.clear_color[1] = i->clearColor[1];
|
||||
|
||||
submitCl.min_x_tile = 0;
|
||||
submitCl.min_y_tile = 0;
|
||||
|
||||
uint32_t tileSizeW = 64;
|
||||
uint32_t tileSizeH = 64;
|
||||
|
||||
if(i->samples > 1)
|
||||
{
|
||||
tileSizeW >>= 1;
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
if(getFormatBpp(i->format) == 64)
|
||||
{
|
||||
tileSizeH >>= 1;
|
||||
}
|
||||
|
||||
uint32_t widthInTiles = divRoundUp(i->width, tileSizeW);
|
||||
uint32_t heightInTiles = divRoundUp(i->height, tileSizeH);
|
||||
|
||||
submitCl.max_x_tile = widthInTiles - 1;
|
||||
submitCl.max_y_tile = heightInTiles - 1;
|
||||
submitCl.width = i->width;
|
||||
submitCl.height = i->height;
|
||||
submitCl.flags |= marker->flags;//VC4_SUBMIT_CL_USE_CLEAR_COLOR;
|
||||
submitCl.clear_z = 0; //TODO
|
||||
submitCl.clear_s = 0;
|
||||
|
||||
submitCl.bo_handles = marker->handles;
|
||||
submitCl.bo_handle_count = marker->handlesSize / 4;
|
||||
submitCl.bin_cl = ((uint8_t*)marker) + sizeof(CLMarker);
|
||||
submitCl.bin_cl_size = marker->size;
|
||||
submitCl.shader_rec = marker->shaderRec;
|
||||
submitCl.shader_rec_size = marker->shaderRecSize;
|
||||
submitCl.shader_rec_count = marker->shaderRecCount;
|
||||
submitCl.uniforms = marker->uniforms;
|
||||
submitCl.uniforms_size = marker->uniformsSize;
|
||||
|
||||
/**/
|
||||
printf("BCL:\n");
|
||||
clDump(((uint8_t*)marker) + sizeof(CLMarker), marker->size);
|
||||
printf("BO handles: ");
|
||||
for(int d = 0; d < marker->handlesSize / 4; ++d)
|
||||
{
|
||||
printf("%u ", *((uint32_t*)(marker->handles)+d));
|
||||
}
|
||||
printf("\nUniforms: ");
|
||||
for(int d = 0; d < marker->uniformsSize / 4; ++d)
|
||||
{
|
||||
printf("%u ", *((uint32_t*)(marker->uniforms)+d));
|
||||
}
|
||||
printf("\nwidth height: %u, %u\n", submitCl.width, submitCl.height);
|
||||
printf("tile min/max: %u,%u %u,%u\n", submitCl.min_x_tile, submitCl.min_y_tile, submitCl.max_x_tile, submitCl.max_y_tile);
|
||||
printf("color read surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.color_read.hindex, submitCl.color_read.offset, submitCl.color_read.bits, submitCl.color_read.flags);
|
||||
printf("color write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.color_write.hindex, submitCl.color_write.offset, submitCl.color_write.bits, submitCl.color_write.flags);
|
||||
printf("zs read surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.zs_read.hindex, submitCl.zs_read.offset, submitCl.zs_read.bits, submitCl.zs_read.flags);
|
||||
printf("zs write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.zs_write.hindex, submitCl.zs_write.offset, submitCl.zs_write.bits, submitCl.zs_write.flags);
|
||||
printf("msaa color write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.msaa_color_write.hindex, submitCl.msaa_color_write.offset, submitCl.msaa_color_write.bits, submitCl.msaa_color_write.flags);
|
||||
printf("msaa zs write surf: hindex, offset, bits, flags %u %u %u %u\n", submitCl.msaa_zs_write.hindex, submitCl.msaa_zs_write.offset, submitCl.msaa_zs_write.bits, submitCl.msaa_zs_write.flags);
|
||||
printf("clear color packed rgba %u %u\n", submitCl.clear_color[0], submitCl.clear_color[1]);
|
||||
printf("clear z %u\n", submitCl.clear_z);
|
||||
printf("clear s %u\n", submitCl.clear_s);
|
||||
printf("flags %u\n", submitCl.flags);
|
||||
/**/
|
||||
|
||||
|
||||
//submit ioctl
|
||||
static uint64_t lastFinishedSeqno = 0;
|
||||
vc4_cl_submit(controlFd, &cmdbuf->submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
|
||||
//submit ioctl
|
||||
static uint64_t lastFinishedSeqno = 0;
|
||||
vc4_cl_submit(controlFd, &submitCl, &queue->lastEmitSeqno, &lastFinishedSeqno);
|
||||
|
||||
//advance in linked list
|
||||
marker = marker->nextMarker;
|
||||
}
|
||||
}
|
||||
|
||||
for(int c = 0; c < pSubmits->commandBufferCount; ++c)
|
||||
|
@ -732,6 +732,23 @@ uint8_t getWrapMode(VkSamplerAddressMode mode)
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t getRenderTargetFormatVC4(VkFormat format)
|
||||
{
|
||||
//TODO dithered BGR565
|
||||
switch(format)
|
||||
{
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT: //HDR mode set in tile binning config mode, so just return a valid format
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
return VC4_RENDER_CONFIG_FORMAT_RGBA8888;
|
||||
case VK_FORMAT_B5G6R5_UNORM_PACK16:
|
||||
return VC4_RENDER_CONFIG_FORMAT_BGR565;
|
||||
default:
|
||||
printf("unsupported rendertarget format: %i\n", format);
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////
|
||||
/// just so we can return a function pointer, TODO
|
||||
|
@ -294,8 +294,6 @@ typedef struct VkCommandBuffer_T
|
||||
//Recorded commands include commands to bind pipelines and descriptor sets to the command buffer, commands to modify dynamic state, commands to draw (for graphics rendering),
|
||||
//commands to dispatch (for compute), commands to execute secondary command buffers (for primary command buffers only), commands to copy buffers and images, and other commands
|
||||
|
||||
struct drm_vc4_submit_cl submitCl;
|
||||
|
||||
//Rpi only supports vertex and pixel shaders
|
||||
//(coordinate shaders will just use the vertex shader push constants)
|
||||
//anything else will be ignored I guess
|
||||
@ -318,7 +316,7 @@ typedef struct VkCommandBuffer_T
|
||||
_pipeline* graphicsPipeline;
|
||||
_pipeline* computePipeline;
|
||||
|
||||
uint32_t firstDraw; //so we can set tile binning config etc.
|
||||
uint32_t numDrawCallsSubmitted;
|
||||
|
||||
uint32_t vertexBufferDirty;
|
||||
uint32_t indexBufferDirty;
|
||||
@ -475,5 +473,6 @@ void encodeTextureUniform(uint32_t* params,
|
||||
uint8_t getTextureDataType(VkFormat format);
|
||||
uint8_t getMinFilterType(VkFilter minFilter, VkSamplerMipmapMode mipFilter, float maxLod);
|
||||
uint8_t getWrapMode(VkSamplerAddressMode mode);
|
||||
uint32_t getRenderTargetFormatVC4(VkFormat format);
|
||||
void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize);
|
||||
void clDump(void* cl, uint32_t size);
|
||||
|
@ -388,6 +388,8 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cb->numDrawCallsSubmitted++;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed(
|
||||
|
@ -96,8 +96,45 @@ VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage(
|
||||
|
||||
//TODO externally sync cmdbuf, cmdpool
|
||||
|
||||
i->needToClear = 1;
|
||||
i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
|
||||
//i->needToClear = 1;
|
||||
//i->clearColor[0] = i->clearColor[1] = packVec4IntoABGR8(pColor->float32);
|
||||
|
||||
|
||||
{ //Simplest case: just submit a job to clear the image
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
|
||||
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
|
||||
0, //double buffer in non ms mode
|
||||
0, //tile allocation block size
|
||||
0, //tile allocation initial block size
|
||||
0, //auto initialize tile state data array
|
||||
getFormatBpp(i->format) == 64, //64 bit color mode
|
||||
i->samples > 1, //msaa
|
||||
i->width, i->height,
|
||||
0, //tile state data array address
|
||||
0, //tile allocation memory size
|
||||
0); //tile allocation memory address
|
||||
|
||||
//START_TILE_BINNING resets the statechange counters in the hardware,
|
||||
//which are what is used when a primitive is binned to a tile to
|
||||
//figure out what new state packets need to be written to that tile's
|
||||
//command list.
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
|
||||
clInsertStartTileBinning(&commandBuffer->binCl);
|
||||
|
||||
//Reset the current compressed primitives format. This gets modified
|
||||
//by VC4_PACKET_GL_INDEXED_PRIMITIVE and
|
||||
//VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
|
||||
//of every tile.
|
||||
//clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length);
|
||||
//clInsertPrimitiveListFormat(&commandBuffer->binCl,
|
||||
// 1, //16 bit
|
||||
// 2); //tris
|
||||
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length);
|
||||
clInsertIncrementSemaphore(&commandBuffer->binCl);
|
||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length);
|
||||
clInsertFlush(&commandBuffer->binCl);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user