mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2025-02-21 18:54:18 +01:00
multithreaded command submission now works
This commit is contained in:
parent
80d90ca190
commit
cc26064e1d
@ -248,15 +248,15 @@ VKAPI_ATTR VkResult VKAPI_CALL RPIFUNC(vkBeginCommandBuffer)(
|
||||
|
||||
//When a command buffer begins recording, all state in that command buffer is undefined
|
||||
|
||||
commandBuffer->usageFlags = pBeginInfo->flags;
|
||||
commandBuffer->state = CMDBUF_STATE_RECORDING;
|
||||
|
||||
if((pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) &&
|
||||
if((commandBuffer->state == CMDBUF_STATE_INVALID || commandBuffer->state == CMDBUF_STATE_EXECUTABLE) &&
|
||||
commandBuffer->cp->resetAble)
|
||||
{
|
||||
RPIFUNC(vkResetCommandBuffer)(commandBuffer, 0);
|
||||
}
|
||||
|
||||
commandBuffer->usageFlags = pBeginInfo->flags;
|
||||
commandBuffer->state = CMDBUF_STATE_RECORDING;
|
||||
|
||||
if(pBeginInfo->pInheritanceInfo && commandBuffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
|
||||
{
|
||||
VkRenderPassBeginInfo rpbi = {0};
|
||||
@ -596,7 +596,7 @@ VKAPI_ATTR VkResult VKAPI_CALL RPIFUNC(vkQueueSubmit)(
|
||||
submitCl.shader_rec_count = marker->shaderRecCount;
|
||||
submitCl.uniforms_size = marker->uniformsSize;
|
||||
|
||||
/**/
|
||||
/**
|
||||
printf("BCL:\n");
|
||||
uint8_t* mem = malloc(marker->size);
|
||||
memcpy(mem, marker+1, marker->size);
|
||||
@ -613,10 +613,18 @@ VKAPI_ATTR VkResult VKAPI_CALL RPIFUNC(vkQueueSubmit)(
|
||||
{
|
||||
printf("%i ", *(((uint32_t*)getCPAptrFromOffset(cmdbuf->uniformsCl.CPA, marker->uniformsBufOffset + cmdbuf->uniformsCl.offset))+d));
|
||||
}
|
||||
|
||||
printf("\nShader recs: ");
|
||||
uint8_t* ptr = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset + cmdbuf->shaderRecCl.offset + (3 + 3) * 4);
|
||||
uint8_t* ptr = getCPAptrFromOffset(cmdbuf->shaderRecCl.CPA, marker->shaderRecBufOffset + cmdbuf->shaderRecCl.offset);
|
||||
for(int d = 0; d < marker->shaderRecCount; ++d)
|
||||
{
|
||||
printf("\nShader rec handle indices: ");
|
||||
int numIndices = 3 + 1;
|
||||
for(int d = 0; d < numIndices; ++d)
|
||||
{
|
||||
printf("%u ", *ptr);
|
||||
ptr += 4;
|
||||
}
|
||||
uint8_t flags = *ptr;
|
||||
uint8_t fragmentShaderIsSingleThreaded = flags & (1 << 0);
|
||||
uint8_t pointSizeIncludedInShadedVertexData = (flags & (1 << 1)) >> 1;
|
||||
@ -985,6 +993,8 @@ VKAPI_ATTR void VKAPI_CALL RPIFUNC(vkCmdExecuteCommands)(
|
||||
|
||||
_commandBuffer* primary = commandBuffer;
|
||||
|
||||
CLMarker* primaryMarker = getCPAptrFromOffset(primary->binCl.CPA, primary->binCl.currMarkerOffset);
|
||||
|
||||
for(uint32_t c = 0; c < commandBufferCount; ++c)
|
||||
{
|
||||
_commandBuffer* secondary = pCommandBuffers[c];
|
||||
@ -1000,28 +1010,37 @@ VKAPI_ATTR void VKAPI_CALL RPIFUNC(vkCmdExecuteCommands)(
|
||||
secondaryMarker->shaderRecRelocSize = secondary->shaderRecRelocCl.nextFreeByteOffset - (secondaryMarker->shaderRecRelocOffset + secondary->shaderRecRelocCl.offset);
|
||||
}
|
||||
|
||||
for(uint32_t d = 0; d < secondaryMarker->uniformRelocSize; ++d)
|
||||
for(uint32_t d = 0; d < secondaryMarker->uniformRelocSize / 4; ++d)
|
||||
{
|
||||
uint32_t offset = *(uint32_t*)getCPAptrFromOffset(secondary->uniformRelocCl.CPA, secondaryMarker->uniformRelocOffset + secondary->uniformRelocCl.offset);
|
||||
uint32_t offset = *(uint32_t*)getCPAptrFromOffset(secondary->uniformRelocCl.CPA, secondaryMarker->uniformRelocOffset + secondary->uniformRelocCl.offset + d * 4);
|
||||
|
||||
uint32_t* handleIdx = getCPAptrFromOffset(secondary->uniformsCl.CPA, secondary->uniformsCl.offset + offset);
|
||||
*handleIdx += primary->handlesCl.nextFreeByteOffset - primary->handlesCl.offset;
|
||||
uint32_t* handleIdx = getCPAptrFromOffset(secondary->uniformsCl.CPA, secondaryMarker->uniformsBufOffset + secondary->uniformsCl.offset + offset);
|
||||
uint32_t handle = *(uint32_t*)getCPAptrFromOffset(secondary->handlesCl.CPA, secondaryMarker->handlesBufOffset + secondary->handlesCl.offset + (*handleIdx) * 4);
|
||||
clFit(&primary->handlesCl, 4);
|
||||
uint32_t idx = clGetHandleIndex(&primary->handlesCl, primaryMarker->handlesBufOffset + primary->handlesCl.offset, primaryMarker->handlesSize, handle);
|
||||
*handleIdx = idx;
|
||||
}
|
||||
|
||||
for(uint32_t d = 0; d < secondaryMarker->gemRelocSize; ++d)
|
||||
for(uint32_t d = 0; d < secondaryMarker->gemRelocSize / 4; ++d)
|
||||
{
|
||||
uint32_t offset = *(uint32_t*)getCPAptrFromOffset(secondary->gemRelocCl.CPA, secondaryMarker->gemRelocOffset + secondary->gemRelocCl.offset);
|
||||
uint32_t offset = *(uint32_t*)getCPAptrFromOffset(secondary->gemRelocCl.CPA, secondaryMarker->gemRelocOffset + secondary->gemRelocCl.offset + d * 4);
|
||||
|
||||
uint32_t* handleIdx = getCPAptrFromOffset(secondary->binCl.CPA, secondary->binCl.offset + offset);
|
||||
*handleIdx += primary->handlesCl.nextFreeByteOffset - primary->handlesCl.offset;
|
||||
uint32_t handle = *(uint32_t*)getCPAptrFromOffset(secondary->handlesCl.CPA, secondaryMarker->handlesBufOffset + secondary->handlesCl.offset + (*handleIdx) * 4);
|
||||
clFit(&primary->handlesCl, 4);
|
||||
uint32_t idx = clGetHandleIndex(&primary->handlesCl, primaryMarker->handlesBufOffset + primary->handlesCl.offset, primaryMarker->handlesSize, handle);
|
||||
*handleIdx = idx;
|
||||
}
|
||||
|
||||
for(uint32_t d = 0; d < secondaryMarker->shaderRecRelocSize; ++d)
|
||||
for(uint32_t d = 0; d < secondaryMarker->shaderRecRelocSize / 4; ++d)
|
||||
{
|
||||
uint32_t offset = *(uint32_t*)getCPAptrFromOffset(secondary->shaderRecRelocCl.CPA, secondaryMarker->shaderRecRelocOffset + secondary->shaderRecRelocCl.offset);
|
||||
uint32_t offset = *(uint32_t*)getCPAptrFromOffset(secondary->shaderRecRelocCl.CPA, secondaryMarker->shaderRecRelocOffset + secondary->shaderRecRelocCl.offset + d * 4);
|
||||
|
||||
uint32_t* handleIdx = getCPAptrFromOffset(secondary->shaderRecCl.CPA, secondary->shaderRecCl.offset + offset);
|
||||
*handleIdx += primary->handlesCl.nextFreeByteOffset - primary->handlesCl.offset;
|
||||
uint32_t* handleIdx = getCPAptrFromOffset(secondary->shaderRecCl.CPA, secondaryMarker->shaderRecBufOffset + secondary->shaderRecCl.offset + offset);
|
||||
uint32_t handle = *(uint32_t*)getCPAptrFromOffset(secondary->handlesCl.CPA, secondaryMarker->handlesBufOffset + secondary->handlesCl.offset + (*handleIdx) * 4);
|
||||
clFit(&primary->handlesCl, 4);
|
||||
uint32_t idx = clGetHandleIndex(&primary->handlesCl, primaryMarker->handlesBufOffset + primary->handlesCl.offset, primaryMarker->handlesSize, handle);
|
||||
*handleIdx = idx;
|
||||
}
|
||||
|
||||
clFit(&primary->binCl, secondaryMarker->size);
|
||||
@ -1029,39 +1048,13 @@ VKAPI_ATTR void VKAPI_CALL RPIFUNC(vkCmdExecuteCommands)(
|
||||
|
||||
((CLMarker*)getCPAptrFromOffset(primary->binCl.CPA, primary->binCl.currMarkerOffset))->numDrawCallsSubmitted += secondaryMarker->numDrawCallsSubmitted;
|
||||
|
||||
//TODO handles/handle indices might be grabled up like this...
|
||||
clFit(&primary->handlesCl, secondaryMarker->handlesSize);
|
||||
clInsertData(&primary->handlesCl, secondaryMarker->handlesSize, getCPAptrFromOffset(secondary->handlesCl.CPA, secondaryMarker->handlesBufOffset + secondary->handlesCl.offset));
|
||||
//clFit(&primary->handlesCl, secondaryMarker->handlesSize);
|
||||
//clInsertData(&primary->handlesCl, secondaryMarker->handlesSize, getCPAptrFromOffset(secondary->handlesCl.CPA, secondaryMarker->handlesBufOffset + secondary->handlesCl.offset));
|
||||
clFit(&primary->uniformsCl, secondaryMarker->uniformsSize);
|
||||
clInsertData(&primary->uniformsCl, secondaryMarker->uniformsSize, getCPAptrFromOffset(secondary->uniformsCl.CPA, secondaryMarker->uniformsBufOffset + secondary->uniformsCl.offset));
|
||||
clFit(&primary->shaderRecCl, secondaryMarker->shaderRecSize);
|
||||
clInsertData(&primary->shaderRecCl, secondaryMarker->shaderRecSize, getCPAptrFromOffset(secondary->shaderRecCl.CPA, secondaryMarker->shaderRecBufOffset + secondary->shaderRecCl.offset));
|
||||
|
||||
|
||||
printf("\nUniforms: ");
|
||||
for(int d = 0; d < secondaryMarker->uniformsSize / 4; ++d)
|
||||
{
|
||||
printf("%i ", *(((uint32_t*)getCPAptrFromOffset(secondary->uniformsCl.CPA, secondaryMarker->uniformsBufOffset + secondary->uniformsCl.offset))+d));
|
||||
}
|
||||
|
||||
printf("\nUniforms: ");
|
||||
for(int d = 0; d < secondaryMarker->uniformsSize / 4; ++d)
|
||||
{
|
||||
printf("%i ", *(((uint32_t*)getCPAptrFromOffset(primary->uniformsCl.CPA, primary->uniformsCl.offset))+d));
|
||||
}
|
||||
|
||||
printf("\nBO handles: ");
|
||||
for(int d = 0; d < secondaryMarker->handlesSize / 4; ++d)
|
||||
{
|
||||
printf("%u ", *(((uint32_t*)getCPAptrFromOffset(secondary->handlesCl.CPA, secondaryMarker->handlesBufOffset + secondary->handlesCl.offset))+d));
|
||||
}
|
||||
|
||||
printf("\nBO handles: ");
|
||||
for(int d = 0; d < secondaryMarker->handlesSize / 4; ++d)
|
||||
{
|
||||
printf("%u ", *(((uint32_t*)getCPAptrFromOffset(primary->handlesCl.CPA, primary->handlesCl.offset))+d));
|
||||
}
|
||||
|
||||
primary->shaderRecCount += secondary->shaderRecCount;
|
||||
}
|
||||
|
||||
|
@ -235,6 +235,26 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
|
||||
assert(vertModule->numVertVPMreads == vertexAttribSize >> 2);
|
||||
assert(vertModule->numCoordVPMreads == coordAttribSize >> 2);
|
||||
|
||||
if(commandBuffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
|
||||
{
|
||||
uint32_t offset = commandBuffer->shaderRecCl.nextFreeByteOffset - commandBuffer->shaderRecCl.offset;
|
||||
|
||||
clFit(&commandBuffer->shaderRecRelocCl, 12);
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
offset += 4;
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
offset += 4;
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
|
||||
|
||||
clFit(&commandBuffer->shaderRecRelocCl, 4 * attribCount);
|
||||
for(uint32_t c = 0; c < attribCount; ++c)
|
||||
{
|
||||
uint32_t offset = commandBuffer->shaderRecCl.nextFreeByteOffset - commandBuffer->shaderRecCl.offset + 12 + c * 4;
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
}
|
||||
}
|
||||
|
||||
//number of attribs
|
||||
//3 is the number of type of possible shaders
|
||||
for(uint32_t c = 0; c < (3 + attribCount)*4; ++c)
|
||||
@ -269,17 +289,6 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
|
||||
coordCode //coordinate shader code address
|
||||
);
|
||||
|
||||
if(commandBuffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
|
||||
{
|
||||
uint32_t offset = commandBuffer->shaderRecCl.nextFreeByteOffset - commandBuffer->shaderRecCl.offset - 12;
|
||||
clFit(&commandBuffer->shaderRecRelocCl, 12);
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
offset -= 16;
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
offset -= 16;
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
}
|
||||
|
||||
uint32_t vertexAttribOffsets[8] = {};
|
||||
uint32_t coordAttribOffsets[8] = {};
|
||||
for(uint32_t c = 1; c < 8; ++c)
|
||||
@ -341,13 +350,6 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
|
||||
vertexAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].location], //vertex vpm offset
|
||||
coordAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].location] //coordinte vpm offset
|
||||
);
|
||||
|
||||
if(commandBuffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
|
||||
{
|
||||
uint32_t offset = commandBuffer->shaderRecCl.nextFreeByteOffset - commandBuffer->shaderRecCl.offset - 12;
|
||||
clFit(&commandBuffer->shaderRecRelocCl, 4);
|
||||
clInsertData(&commandBuffer->shaderRecRelocCl, 4, &offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -666,6 +666,7 @@ void createCommandQueues() {
|
||||
VkCommandPoolCreateInfo poolCreateInfo = {};
|
||||
poolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
poolCreateInfo.queueFamilyIndex = presentQueueFamily;
|
||||
poolCreateInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||
|
||||
if (vkCreateCommandPool(device, &poolCreateInfo, nullptr, &primaryCommandPool) != VK_SUCCESS) {
|
||||
std::cerr << "failed to create command queue for presentation queue family" << std::endl;
|
||||
@ -748,14 +749,14 @@ void threadFunc(uint32_t threadIdx, VkCommandBufferInheritanceInfo inheritanceIn
|
||||
vkCmdPushConstants(threadDataVector[threadIdx].commandBuffer, pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(pushConstants), &pushConstants);
|
||||
|
||||
uint32_t numVerticesPerDrawCall = 3 * 20;
|
||||
for(uint32_t c = threadDataVector[threadIdx].vertexOffset; c < threadDataVector[threadIdx].numVertices; c += numVerticesPerDrawCall)
|
||||
for(uint32_t c = 0; c < threadDataVector[threadIdx].numVertices; c += numVerticesPerDrawCall)
|
||||
{
|
||||
vkCmdDraw(threadDataVector[threadIdx].commandBuffer, min(numVerticesPerDrawCall, threadDataVector[threadIdx].numVertices - c), 1, c, 0);
|
||||
vkCmdDraw(threadDataVector[threadIdx].commandBuffer, min(numVerticesPerDrawCall, threadDataVector[threadIdx].numVertices - c), 1, threadDataVector[threadIdx].vertexOffset + c, 0);
|
||||
}
|
||||
|
||||
vkEndCommandBuffer(threadDataVector[threadIdx].commandBuffer);
|
||||
|
||||
std::cerr << "Recorded thread " << threadIdx << std::endl;
|
||||
//std::cerr << "Recorded thread " << threadIdx << std::endl;
|
||||
}
|
||||
|
||||
void recordCommandBuffers()
|
||||
@ -795,7 +796,7 @@ void recordCommandBuffers()
|
||||
//update secondary command buffers
|
||||
|
||||
//multi threaded mode
|
||||
/**
|
||||
/**/
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(numThreads);
|
||||
for(uint32_t c = 0; c < numThreads; ++c)
|
||||
@ -810,8 +811,9 @@ void recordCommandBuffers()
|
||||
/**/
|
||||
|
||||
//single threaded mode for debugging
|
||||
/**/
|
||||
/**
|
||||
for(uint32_t c = 0; c < numThreads; ++c)
|
||||
//for(uint32_t c = 0; c < 1; ++c)
|
||||
{
|
||||
threadFunc(c, inheritanceInfo);
|
||||
}
|
||||
@ -820,11 +822,13 @@ void recordCommandBuffers()
|
||||
VkCommandBuffer cmdBufs[numThreads];
|
||||
|
||||
for(uint32_t c = 0; c < numThreads; ++c)
|
||||
//for(uint32_t c = 0; c < 1; ++c)
|
||||
{
|
||||
cmdBufs[c] = threadDataVector[c].commandBuffer;
|
||||
}
|
||||
|
||||
vkCmdExecuteCommands(primaryCommandBuffer, numThreads, cmdBufs);
|
||||
//vkCmdExecuteCommands(primaryCommandBuffer, 1, cmdBufs);
|
||||
|
||||
vkCmdEndRenderPass(primaryCommandBuffer);
|
||||
|
||||
@ -834,7 +838,7 @@ void recordCommandBuffers()
|
||||
}
|
||||
|
||||
|
||||
std::cout << "recorded command buffer for image " << imageIndex << std::endl;
|
||||
//std::cout << "recorded command buffer for image " << imageIndex << std::endl;
|
||||
}
|
||||
|
||||
void draw() {
|
||||
@ -845,7 +849,7 @@ void draw() {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
std::cout << "acquired image" << std::endl;
|
||||
//std::cout << "acquired image" << std::endl;
|
||||
|
||||
recordCommandBuffers();
|
||||
|
||||
@ -867,7 +871,7 @@ void draw() {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
std::cout << "submitted draw command buffer" << std::endl;
|
||||
//std::cout << "submitted draw command buffer" << std::endl;
|
||||
|
||||
// Present drawn image
|
||||
// Note: semaphore here is not strictly necessary, because commands are processed in submission order within a single queue
|
||||
@ -887,7 +891,7 @@ void draw() {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
std::cout << "submitted presentation command buffer" << std::endl;
|
||||
//std::cout << "submitted presentation command buffer" << std::endl;
|
||||
}
|
||||
|
||||
void CreateRenderPass()
|
||||
@ -1333,8 +1337,8 @@ void CreateVertexBuffer()
|
||||
float w = 2.0;
|
||||
float h = 2.0;
|
||||
|
||||
float stepH = 90*6.0*h/1080.0;
|
||||
float stepW = 90*8.0*w/1920.0;
|
||||
float stepH = 2*6.0*h/1080.0;
|
||||
float stepW = 2*8.0*w/1920.0;
|
||||
|
||||
vertices.reserve(3 * 2 * 960 * 540);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user