diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index 1dcf355..fce2c83 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -49,7 +49,10 @@ void clInsertNewCLMarker(ControlList* cl, void* writeDepthStencilImagePtr, void* readDepthStencilImagePtr, void* writeMSAAimagePtr, - void* writeMSAAdepthStencilImagePtr) + void* writeMSAAdepthStencilImagePtr, + uint32_t performResolve, + uint32_t readMSAAimage, + uint32_t readMSAAdepthStencilImage) { //to be inserted when you'd insert tile binning mode config assert(cl); @@ -66,6 +69,9 @@ void clInsertNewCLMarker(ControlList* cl, marker.readDepthStencilImage = readDepthStencilImagePtr; marker.writeMSAAimage = writeMSAAimagePtr; marker.writeMSAAdepthStencilImage = writeMSAAdepthStencilImagePtr; + marker.performResolve = performResolve; + marker.readMSAAimage = readMSAAimage; + marker.readMSAAdepthStencilImage = readMSAAdepthStencilImage; marker.handlesSize = 0; marker.shaderRecSize = 0; marker.uniformsSize = 0; diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index ff6f2b8..067b984 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -24,6 +24,9 @@ typedef struct CLMarker void* writeMSAAimage; void* writeMSAAdepthStencilImage; uint32_t flags; //used to store clear flag etc. + uint32_t performResolve; + uint32_t readMSAAimage; + uint32_t readMSAAdepthStencilImage; //pointers that point to where all the other CL data is //plus sizes @@ -72,7 +75,10 @@ void clInsertNewCLMarker(ControlList* cl, void* writeDepthStencilImagePtr, void* readDepthStencilImagePtr, void* writeMSAAimagePtr, - void* writeMSAAdepthStencilImagePtr); + void* writeMSAAdepthStencilImagePtr, + uint32_t performResolve, + uint32_t readMSAAimage, + uint32_t readMSAAdepthStencilImage); void clCloseCurrentMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, uint32_t shaderRecCount, ControlList* uniformsCL); void clInsertData(ControlList* cl, uint32_t size, uint8_t* data); void clInsertUniformConstant(ControlList* cl, uint32_t data); diff --git a/driver/command.c b/driver/command.c index f402eea..96276c7 100644 --- a/driver/command.c +++ b/driver/command.c @@ -123,9 +123,6 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers( clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1)); clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1)); - pCommandBuffers[c]->renderpass = 0; - pCommandBuffers[c]->fbo = 0; - pCommandBuffers[c]->currentSubpass = 0; pCommandBuffers[c]->graphicsPipeline = 0; pCommandBuffers[c]->computePipeline = 0; pCommandBuffers[c]->numDrawCallsSubmitted = 0; @@ -318,9 +315,11 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( _image* readDepthStencilImage = marker->readDepthStencilImage; _image* writeMSAAimage = marker->writeMSAAimage; _image* writeMSAAdepthStencilImage = marker->writeMSAAdepthStencilImage; + uint32_t performResolve = marker->performResolve; + uint32_t readMSAAimage = marker->readMSAAimage; + uint32_t readMSAAdepthStencilImage = marker->readMSAAdepthStencilImage; //This should not result in an insertion! - uint32_t writeImageIdx = writeImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeImage->boundMem->bo) : 0; uint32_t readImageIdx = readImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, readImage->boundMem->bo) : 0; uint32_t writeDepthStencilImageIdx = writeDepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeDepthStencilImage->boundMem->bo) : 0; @@ -328,12 +327,29 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( uint32_t writeMSAAimageIdx = writeMSAAimage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeMSAAimage->boundMem->bo) : 0; uint32_t writeMSAAdepthStencilImageIdx = writeMSAAdepthStencilImage ? clGetHandleIndex(&cmdbuf->handlesCl, marker->handlesBuf, marker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo) : 0; - uint32_t msaa = writeMSAAimageIdx != 0; +// fprintf(stderr, "writeImage: %u\n", writeImage); +// fprintf(stderr, "readImage: %u\n", readImage); +// fprintf(stderr, "writeDepthStencilImage: %u\n", writeDepthStencilImage); +// fprintf(stderr, "readDepthStencilImage: %u\n", readDepthStencilImage); +// fprintf(stderr, "writeMSAAimage: %u\n", writeMSAAimage); +// fprintf(stderr, "writeMSAAdepthStencilImage: %u\n", writeMSAAdepthStencilImage); +// fprintf(stderr, "performResolve: %u\n", performResolve); +// fprintf(stderr, "readMSAAimage: %u\n", readMSAAimage); +// fprintf(stderr, "readMSAAdepthStencilImage: %u\n", readMSAAdepthStencilImage); +// fprintf(stderr, "writeImageIdx: %u\n", writeImageIdx); +// fprintf(stderr, "readImageIdx: %u\n", readImageIdx); +// fprintf(stderr, "writeDepthStencilImageIdx: %u\n", writeDepthStencilImageIdx); +// fprintf(stderr, "readDepthStencilImageIdx: %u\n", readDepthStencilImageIdx); +// fprintf(stderr, "writeMSAAimageIdx: %u\n", writeMSAAimageIdx); +// fprintf(stderr, "writeMSAAdepthStencilImageIdx: %u\n", writeMSAAdepthStencilImageIdx); - //TODO handle don't care store bit + submitCl.clear_color[0] = 0; + submitCl.clear_color[1] = 0; + submitCl.clear_z = 0; + submitCl.clear_s = 0; //fill out submit cl fields - if(writeImageIdx) + if(writeImage) { submitCl.color_write.hindex = writeImageIdx; submitCl.color_write.offset = 0; @@ -342,25 +358,36 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( VC4_SET_FIELD(getRenderTargetFormatVC4(writeImage->format), VC4_RENDER_CONFIG_FORMAT) | VC4_SET_FIELD(writeImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); - //TODO which image should the clear color come from? + if(performResolve) + { + submitCl.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X | VC4_RENDER_CONFIG_DECIMATE_MODE_4X; + } + submitCl.clear_color[0] = writeImage->clearColor[0]; submitCl.clear_color[1] = writeImage->clearColor[1]; } - else + + if(writeMSAAimage) { - submitCl.clear_color[0] = 0; - submitCl.clear_color[1] = 0; + submitCl.msaa_color_write.hindex = writeMSAAimageIdx; + submitCl.msaa_color_write.offset = 0; + submitCl.msaa_color_write.flags = 0; + submitCl.msaa_color_write.bits = VC4_RENDER_CONFIG_MS_MODE_4X; + + submitCl.clear_color[0] = writeMSAAimage->clearColor[0]; + submitCl.clear_color[1] = writeMSAAimage->clearColor[1]; } - if(readImageIdx) + if(readImage) { submitCl.color_read.hindex = readImageIdx; submitCl.color_read.offset = 0; - submitCl.color_read.flags = readImage->samples > 1 ? VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES : 0; - submitCl.color_read.bits = 0; //TODO + submitCl.color_read.flags = readMSAAimage ? VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES : 0; + submitCl.color_read.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(readImage->format), VC4_RENDER_CONFIG_FORMAT) | + VC4_SET_FIELD(readImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); } - if(writeDepthStencilImageIdx) + if(writeDepthStencilImage) { submitCl.zs_write.hindex = writeDepthStencilImageIdx; submitCl.zs_write.offset = 0; @@ -371,31 +398,31 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( submitCl.clear_z = writeDepthStencilImage->clearColor[0]; //0...1 -> 0...0xffffff submitCl.clear_s = writeDepthStencilImage->clearColor[1]; //0...0xff } - else + + if(writeMSAAdepthStencilImage) { - submitCl.clear_z = 0; - submitCl.clear_s = 0; + submitCl.msaa_zs_write.hindex = writeMSAAdepthStencilImageIdx; + submitCl.msaa_zs_write.offset = 0; + submitCl.msaa_zs_write.flags = 0; + submitCl.msaa_zs_write.bits = VC4_RENDER_CONFIG_MS_MODE_4X; + + submitCl.clear_z = writeMSAAdepthStencilImage->clearColor[0]; //0...1 -> 0...0xffffff + submitCl.clear_s = writeMSAAdepthStencilImage->clearColor[1]; //0...0xff } - if(readDepthStencilImageIdx) + if(readDepthStencilImage) { submitCl.zs_read.hindex = readDepthStencilImageIdx; submitCl.zs_read.offset = 0; - submitCl.zs_read.flags = 0; - submitCl.zs_read.bits = 0; //TODO + submitCl.zs_read.flags = readMSAAdepthStencilImage ? VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES : 0; //TODO is this valid? + submitCl.zs_read.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(readDepthStencilImage->format), VC4_RENDER_CONFIG_FORMAT) | + VC4_SET_FIELD(readDepthStencilImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); } - //TODO handle this properly - if(msaa) - { - // This bit controls how many pixels the general - // (i.e. subsampled) loads/stores are iterating over - // (multisample loads replicate out to the other samples). - submitCl.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X; - // Controls whether color_write's - // VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation - submitCl.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X; - } +// fprintf(stderr, "submitCl.clear_color[0]: %u\n", submitCl.clear_color[0]); +// fprintf(stderr, "submitCl.clear_color[1]: %u\n", submitCl.clear_color[1]); +// fprintf(stderr, "submitCl.clear_z: %u\n", submitCl.clear_z); +// fprintf(stderr, "submitCl.clear_s: %u\n", submitCl.clear_s); submitCl.min_x_tile = 0; submitCl.min_y_tile = 0; @@ -403,33 +430,41 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( uint32_t tileSizeW = 64; uint32_t tileSizeH = 64; - if(msaa) + uint32_t widthInTiles = 0, heightInTiles = 0; + uint32_t width = 0, height = 0, bpp = 0; + + if(writeImage) + { + width = writeImage->width; + height = writeImage->height; + bpp = getFormatBpp(writeImage->format); + } + else if(writeMSAAimage) + { + width = writeMSAAimage->width; + height = writeMSAAimage->height; + bpp = getFormatBpp(writeMSAAimage->format); + } + else if(writeDepthStencilImage) + { + width = writeDepthStencilImage->width; + height = writeDepthStencilImage->height; + } + else if(writeMSAAdepthStencilImage) + { + width = writeMSAAdepthStencilImage->width; + height = writeMSAAdepthStencilImage->height; + } + + if(bpp == 64) { - tileSizeW >>= 1; tileSizeH >>= 1; } - uint32_t widthInTiles, heightInTiles, width, height; - - if(writeImageIdx) + if(performResolve || writeMSAAimage || writeMSAAdepthStencilImage) { - if(getFormatBpp(writeImage->format) == 64) - { - tileSizeH >>= 1; - } - - width = writeImage->width; - height = writeImage->height; - } - else if(writeMSAAimageIdx) - { - if(getFormatBpp(writeMSAAimage->format) == 64) - { - tileSizeH >>= 1; - } - - width = writeMSAAimage->width; - height = writeMSAAimage->height; + tileSizeW >>= 1; + tileSizeH >>= 1; } widthInTiles = divRoundUp(width, tileSizeW); diff --git a/driver/common.h b/driver/common.h index 4c2abb4..061497e 100644 --- a/driver/common.h +++ b/driver/common.h @@ -335,32 +335,12 @@ typedef struct VkCommandBuffer_T VkCommandBufferUsageFlags usageFlags; _commandPool* cp; - VkRect2D renderArea; - _renderpass* renderpass; - _framebuffer* fbo; - uint32_t currentSubpass; + //State data _pipeline* graphicsPipeline; _pipeline* computePipeline; uint32_t numDrawCallsSubmitted; - uint32_t vertexBufferDirty; - uint32_t indexBufferDirty; - uint32_t viewportDirty; - uint32_t lineWidthDirty; - uint32_t depthBiasDirty; - uint32_t graphicsPipelineDirty; - uint32_t computePipelineDirty; - uint32_t subpassDirty; - uint32_t blendConstantsDirty; - uint32_t scissorDirty; - uint32_t depthBoundsDirty; - uint32_t stencilCompareMaskDirty; - uint32_t stencilWriteMaskDirty; - uint32_t stencilReferenceDirty; - uint32_t descriptorSetDirty; - uint32_t pushConstantDirty; - VkViewport viewport; VkRect2D scissor; float lineWidth; @@ -379,6 +359,24 @@ typedef struct VkCommandBuffer_T uint32_t indexBufferOffset; _buffer* indexBuffer; + + //dirty flags used to reduce command stream clutter + uint32_t vertexBufferDirty; + uint32_t indexBufferDirty; + uint32_t viewportDirty; + uint32_t lineWidthDirty; + uint32_t depthBiasDirty; + uint32_t graphicsPipelineDirty; + uint32_t computePipelineDirty; + uint32_t subpassDirty; + uint32_t blendConstantsDirty; + uint32_t scissorDirty; + uint32_t depthBoundsDirty; + uint32_t stencilCompareMaskDirty; + uint32_t stencilWriteMaskDirty; + uint32_t stencilReferenceDirty; + uint32_t descriptorSetDirty; + uint32_t pushConstantDirty; } _commandBuffer; typedef struct VkFence_T diff --git a/driver/draw.c b/driver/draw.c index 7a9223e..f7da2ad 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -8,8 +8,6 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer) assert(commandBuffer); _commandBuffer* cb = commandBuffer; - _renderpass* rp = cb->renderpass; - _framebuffer* fb = cb->fbo; //TODO handle cases when submitting >65k vertices in a VBO //TODO HW-2116 workaround diff --git a/driver/instance.c b/driver/instance.c index f0104d0..19006f4 100644 --- a/driver/instance.c +++ b/driver/instance.c @@ -164,6 +164,11 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateInstance( (*pInstance)->hasThreadedFs = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_THREADED_FS); (*pInstance)->hasMadvise = vc4_has_feature(controlFd, DRM_VC4_PARAM_SUPPORTS_MADVISE); + assert((*pInstance)->hasTiling); + assert((*pInstance)->hasControlFlow); + assert((*pInstance)->hasEtc1); + assert((*pInstance)->hasThreadedFs); + return VK_SUCCESS; } diff --git a/driver/renderpass.c b/driver/renderpass.c index 1f810b2..559436a 100644 --- a/driver/renderpass.c +++ b/driver/renderpass.c @@ -10,94 +10,199 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB assert(commandBuffer); assert(pRenderPassBegin); - - //TODO subpass contents ignored +// typedef struct VkRenderPassBeginInfo { +// VkStructureType sType; +// const void* pNext; +// VkRenderPass renderPass; +// VkFramebuffer framebuffer; +// VkRect2D renderArea; +// uint32_t clearValueCount; +// const VkClearValue* pClearValues; +// } VkRenderPassBeginInfo; _commandBuffer* cb = commandBuffer; - cb->fbo = pRenderPassBegin->framebuffer; - cb->renderpass = pRenderPassBegin->renderPass; - cb->renderArea = pRenderPassBegin->renderArea; + _renderpass* rp = pRenderPassBegin->renderPass; + _framebuffer* fb = pRenderPassBegin->framebuffer; - for(int c = 0; c < pRenderPassBegin->clearValueCount; ++c) + _image* writeImage = 0; + _image* readImage = 0; + _image* writeDepthStencilImage = 0; + _image* readDepthStencilImage = 0; + _image* writeMSAAimage = 0; + _image* writeMSAAdepthStencilImage = 0; + uint32_t performResolve = 0; + uint32_t readMSAAimage = 0; + uint32_t readMSAAdepthStencilImage = 0; + uint32_t flags = 0; + + //TODO handle multiple subpasses + //TODO subpass contents ignored + //TODO input attachments ignored + //TODO preserve attachments ignored + + //TODO handle lazily allocated memory + + if(rp->subpasses[0].colorAttachmentCount > 0) { - if(cb->renderpass->attachments[c].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + if(rp->subpasses[0].pColorAttachments) { - if(!isDepthStencilFormat(cb->renderpass->attachments[c].format)) + if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].storeOp == VK_ATTACHMENT_STORE_OP_STORE) { - cb->fbo->attachmentViews[c].image->clearColor[0] = cb->fbo->attachmentViews[c].image->clearColor[1] = packVec4IntoABGR8(pRenderPassBegin->pClearValues[c].color.float32); + if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].samples > 1) + { + writeMSAAimage = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image; + } + else + { + writeImage = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image; + } + } + + if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + { + readImage = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image; + + if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].samples > 1) + { + readMSAAimage = 1; + } + } + + if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; + + if(!rp->subpasses[0].pResolveAttachments) + { + fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image->clearColor[0] = + fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image->clearColor[1] = + packVec4IntoABGR8(pRenderPassBegin->pClearValues[rp->subpasses[0].pColorAttachments[0].attachment].color.float32); + } + else + { + fb->attachmentViews[rp->subpasses[0].pResolveAttachments[0].attachment].image->clearColor[0] = + fb->attachmentViews[rp->subpasses[0].pResolveAttachments[0].attachment].image->clearColor[1] = + packVec4IntoABGR8(pRenderPassBegin->pClearValues[rp->subpasses[0].pColorAttachments[0].attachment].color.float32); + } + } + } + + if(rp->subpasses[0].pResolveAttachments && + rp->attachments[rp->subpasses[0].pResolveAttachments[0].attachment].storeOp == VK_ATTACHMENT_STORE_OP_STORE) + { + writeImage = fb->attachmentViews[rp->subpasses[0].pResolveAttachments[0].attachment].image; + performResolve = 1; + } + } + + if(rp->subpasses[0].pDepthStencilAttachment) + { + if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].storeOp == VK_ATTACHMENT_STORE_OP_STORE || + rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].stencilStoreOp == VK_ATTACHMENT_STORE_OP_STORE) + { + if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].samples > 1) + { + writeMSAAdepthStencilImage = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image; } else { - //for combined depth/stencil images clearColor 0 is depth and 1 is stencil - cb->fbo->attachmentViews[c].image->clearColor[0] = (uint32_t)(pRenderPassBegin->pClearValues[c].depthStencil.depth * 0xffffff) & 0xffffff; + writeDepthStencilImage = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image; } } - if(isDepthStencilFormat(cb->renderpass->attachments[c].format) && cb->renderpass->attachments[c].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || + rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) { - cb->fbo->attachmentViews[c].image->clearColor[1] = pRenderPassBegin->pClearValues[c].depthStencil.stencil & 0xff; + readDepthStencilImage = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image; + + if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].samples > 1) + { + readMSAAdepthStencilImage = 1; + } + } + + if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image->clearColor[0] = + (uint32_t)(pRenderPassBegin->pClearValues[rp->subpasses[0].pDepthStencilAttachment->attachment].depthStencil.depth * 0xffffff) & 0xffffff; + } + + if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image->clearColor[1] = + pRenderPassBegin->pClearValues[rp->subpasses[0].pDepthStencilAttachment->attachment].depthStencil.stencil & 0xff; } } - cb->currentSubpass = 0; - - _image* i = 0; - _image* MSAAimage = 0; - _image* dsI = 0; - - _renderpass* rp = pRenderPassBegin->renderPass; - - //TODO handle MSAA properly - if(!rp->subpasses[cb->currentSubpass].pResolveAttachments) - { - for(uint32_t c = 0; c < rp->subpasses[cb->currentSubpass].colorAttachmentCount; ++c) - { - i = cb->fbo->attachmentViews[rp->subpasses[cb->currentSubpass].pColorAttachments[c].attachment].image; - break; //TODO handle multiple attachments - } - } - else - { - for(uint32_t c = 0; c < rp->subpasses[cb->currentSubpass].colorAttachmentCount; ++c) - { - i = cb->fbo->attachmentViews[rp->subpasses[cb->currentSubpass].pResolveAttachments[c].attachment].image; - break; //TODO handle multiple attachments - } - - for(uint32_t c = 0; c < rp->subpasses[cb->currentSubpass].colorAttachmentCount; ++c) - { - MSAAimage = cb->fbo->attachmentViews[rp->subpasses[cb->currentSubpass].pColorAttachments[c].attachment].image; - break; //TODO handle multiple attachments - } - } - - if(rp->subpasses[cb->currentSubpass].pDepthStencilAttachment) - { - dsI = cb->fbo->attachmentViews[rp->subpasses[cb->currentSubpass].pDepthStencilAttachment->attachment].image; - } clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker)); - clInsertNewCLMarker(&commandBuffer->binCl, &cb->handlesCl, &cb->shaderRecCl, cb->shaderRecCount, &cb->uniformsCl, i, MSAAimage, dsI); + clInsertNewCLMarker(&commandBuffer->binCl, &cb->handlesCl, &cb->shaderRecCl, cb->shaderRecCount, &cb->uniformsCl, + writeImage, readImage, writeDepthStencilImage, readDepthStencilImage, writeMSAAimage, writeMSAAdepthStencilImage, + performResolve, readMSAAimage, readMSAAdepthStencilImage); - //insert reloc for render target - clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, i->boundMem->bo); + cb->binCl.currMarker->flags = flags; - //insert reloc for depth/stencil image - if(dsI) + //insert relocs + + if(writeImage) { clFit(commandBuffer, &commandBuffer->handlesCl, 4); - clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, dsI->boundMem->bo); + clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeImage->boundMem->bo); } - //TODO handle multiple attachments - for(uint32_t c = 0; c < cb->renderpass->numAttachments; ++c) + if(readImage) { - if(cb->renderpass->attachments[c].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) - { - //TODO separate clear for color / depth / stencil? - cb->binCl.currMarker->flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; - } + clFit(commandBuffer, &commandBuffer->handlesCl, 4); + clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, readImage->boundMem->bo); + } + + if(writeDepthStencilImage) + { + clFit(commandBuffer, &commandBuffer->handlesCl, 4); + clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeDepthStencilImage->boundMem->bo); + } + + if(readDepthStencilImage) + { + clFit(commandBuffer, &commandBuffer->handlesCl, 4); + clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, readDepthStencilImage->boundMem->bo); + } + + if(writeMSAAimage) + { + clFit(commandBuffer, &commandBuffer->handlesCl, 4); + clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAimage->boundMem->bo); + } + + if(writeMSAAdepthStencilImage) + { + clFit(commandBuffer, &commandBuffer->handlesCl, 4); + clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); + } + + uint32_t width = 0, height = 0, bpp = 0; + + if(writeImage) + { + width = writeImage->width; + height = writeImage->height; + bpp = getFormatBpp(writeImage->format); + } + else if(writeMSAAimage) + { + width = writeMSAAimage->width; + height = writeMSAAimage->height; + bpp = getFormatBpp(writeMSAAimage->format); + } + else if(writeDepthStencilImage) + { + width = writeDepthStencilImage->width; + height = writeDepthStencilImage->height; + } + else if(writeMSAAdepthStencilImage) + { + width = writeMSAAdepthStencilImage->width; + height = writeMSAAdepthStencilImage->height; } clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length); @@ -106,9 +211,9 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB 0, //tile allocation block size 0, //tile allocation initial block size 0, //auto initialize tile state data array - getFormatBpp(i->format) == 64, //64 bit color mode - MSAAimage ? 1 : 0, //TODO msaa - i->width, i->height, + bpp == 64, //64 bit color mode + writeMSAAimage || writeMSAAdepthStencilImage || performResolve ? 1 : 0, //msaa + width, height, 0, //tile state data array address 0, //tile allocation memory size 0); //tile allocation memory address @@ -366,7 +471,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdNextSubpass( //TODO contents, everything else... _commandBuffer* cb = commandBuffer; - cb->currentSubpass++; //TODO check max subpass? + //cb->currentSubpass++; //TODO check max subpass? } /* diff --git a/driver/stateChange.c b/driver/stateChange.c index 03445fc..58d57f9 100644 --- a/driver/stateChange.c +++ b/driver/stateChange.c @@ -96,7 +96,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage( { //Simplest case: just submit a job to clear the image clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker)); - clInsertNewCLMarker(&commandBuffer->binCl, &commandBuffer->handlesCl, &commandBuffer->shaderRecCl, commandBuffer->shaderRecCount, &commandBuffer->uniformsCl, i, 0, 0); + clInsertNewCLMarker(&commandBuffer->binCl, &commandBuffer->handlesCl, &commandBuffer->shaderRecCl, commandBuffer->shaderRecCount, &commandBuffer->uniformsCl, i, 0, 0, 0, 0, 0, 0, 0, 0); //insert reloc for render target clFit(commandBuffer, &commandBuffer->handlesCl, 4); diff --git a/test/MSAA/MSAA.cpp b/test/MSAA/MSAA.cpp index 5c2a51a..d3ca055 100644 --- a/test/MSAA/MSAA.cpp +++ b/test/MSAA/MSAA.cpp @@ -754,7 +754,7 @@ void CreateRenderPass() // Multisampled attachment that we render to attachDesc[0].format = swapchainFormat.format; attachDesc[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - attachDesc[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachDesc[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachDesc[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachDesc[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachDesc[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; @@ -765,7 +765,7 @@ void CreateRenderPass() // This is the frame buffer attachment to where the multisampled image // will be resolved to and which will be presented to the swapchain attachDesc[1].format = swapchainFormat.format; - attachDesc[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + attachDesc[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachDesc[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachDesc[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachDesc[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;