From 9348346ef856382dd9dde6f05b95136c03ea9cdc Mon Sep 17 00:00:00 2001 From: Unknown <0.tamas.marton@gmail.com> Date: Sun, 1 Mar 2020 19:11:31 +0000 Subject: [PATCH] made some progress with mipmapping --- driver/ControlListUtil.c | 34 ++------------------- driver/ControlListUtil.h | 17 +++++------ driver/command.c | 12 ++++---- driver/common.c | 12 ++++++++ driver/common.h | 2 ++ driver/draw.c | 2 +- driver/renderpass.c | 32 ++++++++++++++++++-- driver/resource.c | 55 +++++++++++++++++++++++++++++----- driver/stateChange.c | 3 +- test/mipmapping/mipmapping.cpp | 1 - 10 files changed, 109 insertions(+), 61 deletions(-) diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index 1029b72..bbc0427 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -44,16 +44,7 @@ void clInsertNewCLMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, uint32_t shaderRecCount, - ControlList* uniformsCL, - void* writeImagePtr, - void* readImagePtr, - void* writeDepthStencilImagePtr, - void* readDepthStencilImagePtr, - void* writeMSAAimagePtr, - void* writeMSAAdepthStencilImagePtr, - uint32_t performResolve, - uint32_t readMSAAimage, - uint32_t readMSAAdepthStencilImage) + ControlList* uniformsCL) { //to be inserted when you'd insert tile binning mode config assert(cl); @@ -61,28 +52,7 @@ void clInsertNewCLMarker(ControlList* cl, assert(shaderRecCL); assert(uniformsCL); - CLMarker marker; - marker.nextMarker = 0; - marker.size = 0; - marker.writeImage = writeImagePtr; - marker.readImage = readImagePtr; - marker.writeDepthStencilImage = writeDepthStencilImagePtr; - marker.readDepthStencilImage = readDepthStencilImagePtr; - marker.writeMSAAimage = writeMSAAimagePtr; - marker.writeMSAAdepthStencilImage = writeMSAAdepthStencilImagePtr; - marker.performResolve = performResolve; - marker.readMSAAimage = readMSAAimage; - marker.readMSAAdepthStencilImage = readMSAAdepthStencilImage; - marker.perfmonID = 0; - marker.clearColor[0] = 0; - marker.clearColor[1] = 0; - marker.clearDepth = 0; - marker.clearStencil = 0; - marker.handlesSize = 0; - marker.shaderRecSize = 0; - marker.uniformsSize = 0; - marker.shaderRecCount = 0; - marker.flags = 0; + CLMarker marker = {}; marker.handlesBuf = handlesCL->buffer; marker.shaderRecBuf = shaderRecCL->buffer; marker.uniformsBuf = uniformsCL->buffer; diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index acb8733..69e41e0 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -23,6 +23,12 @@ typedef struct CLMarker void* readDepthStencilImage; void* writeMSAAimage; void* writeMSAAdepthStencilImage; + uint32_t writeImageOffset; + uint32_t readImageOffset; + uint32_t writeDepthStencilImageOffset; + uint32_t readDepthStencilImageOffset; + uint32_t writeMSAAimageOffset; + uint32_t writeMSAAdepthStencilImageOffset; uint32_t flags; //used to store clear flag etc. uint32_t performResolve; uint32_t readMSAAimage; @@ -71,16 +77,7 @@ void clInsertNewCLMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, uint32_t shaderRecCount, - ControlList* uniformsCL, - void* writeImagePtr, - void* readImagePtr, - void* writeDepthStencilImagePtr, - void* readDepthStencilImagePtr, - void* writeMSAAimagePtr, - void* writeMSAAdepthStencilImagePtr, - uint32_t performResolve, - uint32_t readMSAAimage, - uint32_t readMSAAdepthStencilImage); + ControlList* uniformsCL); void clCloseCurrentMarker(ControlList* cl, ControlList* handlesCL, ControlList* shaderRecCL, uint32_t shaderRecCount, ControlList* uniformsCL); void clInsertData(ControlList* cl, uint32_t size, uint8_t* data); void clInsertUniformConstant(ControlList* cl, uint32_t data); diff --git a/driver/command.c b/driver/command.c index 504c537..9eecd61 100644 --- a/driver/command.c +++ b/driver/command.c @@ -355,7 +355,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(writeImage) { submitCl.color_write.hindex = writeImageIdx; - submitCl.color_write.offset = 0; + submitCl.color_write.offset = marker->writeImageOffset; submitCl.color_write.flags = 0; submitCl.color_write.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(writeImage->format), VC4_RENDER_CONFIG_FORMAT) | @@ -370,7 +370,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(writeMSAAimage) { submitCl.msaa_color_write.hindex = writeMSAAimageIdx; - submitCl.msaa_color_write.offset = 0; + submitCl.msaa_color_write.offset = marker->writeMSAAimageOffset; submitCl.msaa_color_write.flags = 0; submitCl.msaa_color_write.bits = VC4_RENDER_CONFIG_MS_MODE_4X; } @@ -378,7 +378,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(readImage) { submitCl.color_read.hindex = readImageIdx; - submitCl.color_read.offset = 0; + submitCl.color_read.offset = marker->readImageOffset; submitCl.color_read.flags = readMSAAimage ? VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES : 0; submitCl.color_read.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(readImage->format), VC4_RENDER_CONFIG_FORMAT) | VC4_SET_FIELD(readImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); @@ -387,7 +387,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(writeDepthStencilImage) { submitCl.zs_write.hindex = writeDepthStencilImageIdx; - submitCl.zs_write.offset = 0; + submitCl.zs_write.offset = marker->writeDepthStencilImageOffset; submitCl.zs_write.flags = 0; submitCl.zs_write.bits = VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, VC4_LOADSTORE_TILE_BUFFER_BUFFER) | VC4_SET_FIELD(writeDepthStencilImage->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING); @@ -396,7 +396,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(writeMSAAdepthStencilImage) { submitCl.msaa_zs_write.hindex = writeMSAAdepthStencilImageIdx; - submitCl.msaa_zs_write.offset = 0; + submitCl.msaa_zs_write.offset = marker->writeMSAAdepthStencilImageOffset; submitCl.msaa_zs_write.flags = 0; submitCl.msaa_zs_write.bits = VC4_RENDER_CONFIG_MS_MODE_4X; } @@ -404,7 +404,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(readDepthStencilImage) { submitCl.zs_read.hindex = readDepthStencilImageIdx; - submitCl.zs_read.offset = 0; + submitCl.zs_read.offset = marker->readDepthStencilImageOffset; submitCl.zs_read.flags = readMSAAdepthStencilImage ? VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES : 0; //TODO is this valid? submitCl.zs_read.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(readDepthStencilImage->format), VC4_RENDER_CONFIG_FORMAT) | VC4_SET_FIELD(readDepthStencilImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); diff --git a/driver/common.c b/driver/common.c index 8028e7a..855c4c9 100644 --- a/driver/common.c +++ b/driver/common.c @@ -948,6 +948,18 @@ uint32_t getRenderTargetFormatVC4(VkFormat format) } } +//return closest power of 2 number greater or equal to n +uint32_t getPow2Pad(uint32_t n) +{ + n--; + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + return ++n; +} + //////////////////////////////////////////////////// //////////////////////////////////////////////////// /// just so we can return a function pointer diff --git a/driver/common.h b/driver/common.h index 0696bec..a30ddc3 100644 --- a/driver/common.h +++ b/driver/common.h @@ -189,6 +189,7 @@ typedef struct VkImage_T uint32_t width, height, depth; uint32_t paddedWidth, paddedHeight; uint32_t miplevels, samples; + uint32_t levelOffsets[11]; //max 11 mip levels uint32_t layers; //number of views for multiview/stereo uint32_t size; //overall size including padding and alignment uint32_t stride; //the number of bytes from one row of pixels in memory to the next row of pixels in memory (aka pitch) @@ -546,3 +547,4 @@ uint32_t getRenderTargetFormatVC4(VkFormat format); void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize); void clDump(void* cl, uint32_t size); void setupEmulationResources(VkDevice device); +uint32_t getPow2Pad(uint32_t n); diff --git a/driver/draw.c b/driver/draw.c index 04f3492..7a51c49 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -336,7 +336,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer) getTextureDataType(di->imageView->interpretedFormat), di->imageView->viewType == VK_IMAGE_VIEW_TYPE_CUBE, 0, //TODO cubemap stride - 0, //TODO texture base ptr + di->imageView->image->levelOffsets[0] >> 12, //Image level 0 offset in multiples of 4KB di->imageView->image->height & 2047, di->imageView->image->width & 2047, getMinFilterType(di->sampler->minFilter, di->sampler->mipmapMode, di->sampler->maxLod), diff --git a/driver/renderpass.c b/driver/renderpass.c index 2374829..525e490 100644 --- a/driver/renderpass.c +++ b/driver/renderpass.c @@ -34,6 +34,12 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB uint32_t readMSAAimage = 0; uint32_t readMSAAdepthStencilImage = 0; uint32_t flags = 0; + uint32_t writeImageOffset = 0; + uint32_t readImageOffset = 0; + uint32_t writeDepthStencilImageOffset = 0; + uint32_t readDepthStencilImageOffset = 0; + uint32_t writeMSAAimageOffset = 0; + uint32_t writeMSAAdepthStencilImageOffset = 0; //TODO handle multiple subpasses //TODO subpass contents ignored @@ -51,16 +57,19 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].samples > 1) { writeMSAAimage = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image; + writeMSAAimageOffset = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image->levelOffsets[fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].subresourceRange.baseMipLevel]; } else { writeImage = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image; + writeImageOffset = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image->levelOffsets[fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].subresourceRange.baseMipLevel]; } } if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) { readImage = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image; + readImageOffset = fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].image->levelOffsets[fb->attachmentViews[rp->subpasses[0].pColorAttachments[0].attachment].subresourceRange.baseMipLevel]; if(rp->attachments[rp->subpasses[0].pColorAttachments[0].attachment].samples > 1) { @@ -73,6 +82,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB rp->attachments[rp->subpasses[0].pResolveAttachments[0].attachment].storeOp == VK_ATTACHMENT_STORE_OP_STORE) { writeImage = fb->attachmentViews[rp->subpasses[0].pResolveAttachments[0].attachment].image; + writeImageOffset = fb->attachmentViews[rp->subpasses[0].pResolveAttachments[0].attachment].image->levelOffsets[fb->attachmentViews[rp->subpasses[0].pResolveAttachments[0].attachment].subresourceRange.baseMipLevel]; performResolve = 1; } } @@ -85,10 +95,12 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].samples > 1) { writeMSAAdepthStencilImage = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image; + writeMSAAdepthStencilImageOffset = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image->levelOffsets[fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].subresourceRange.baseMipLevel]; } else { writeDepthStencilImage = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image; + writeDepthStencilImageOffset = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image->levelOffsets[fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].subresourceRange.baseMipLevel]; } } @@ -96,6 +108,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) { readDepthStencilImage = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image; + readDepthStencilImageOffset = fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].image->levelOffsets[fb->attachmentViews[rp->subpasses[0].pDepthStencilAttachment->attachment].subresourceRange.baseMipLevel]; if(rp->attachments[rp->subpasses[0].pDepthStencilAttachment->attachment].samples > 1) { @@ -106,9 +119,22 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker)); - clInsertNewCLMarker(&commandBuffer->binCl, &cb->handlesCl, &cb->shaderRecCl, cb->shaderRecCount, &cb->uniformsCl, - writeImage, readImage, writeDepthStencilImage, readDepthStencilImage, writeMSAAimage, writeMSAAdepthStencilImage, - performResolve, readMSAAimage, readMSAAdepthStencilImage); + clInsertNewCLMarker(&commandBuffer->binCl, &cb->handlesCl, &cb->shaderRecCl, cb->shaderRecCount, &cb->uniformsCl); + commandBuffer->binCl.currMarker->writeImage = writeImage; + commandBuffer->binCl.currMarker->writeImageOffset = writeImageOffset; + commandBuffer->binCl.currMarker->readImage = readImage; + commandBuffer->binCl.currMarker->readImageOffset = readImageOffset; + commandBuffer->binCl.currMarker->writeDepthStencilImage = writeDepthStencilImage; + commandBuffer->binCl.currMarker->writeDepthStencilImageOffset = writeDepthStencilImageOffset; + commandBuffer->binCl.currMarker->readDepthStencilImage = readDepthStencilImage; + commandBuffer->binCl.currMarker->readDepthStencilImageOffset = readDepthStencilImageOffset; + commandBuffer->binCl.currMarker->writeMSAAimage = writeMSAAimage; + commandBuffer->binCl.currMarker->writeMSAAimageOffset = writeMSAAimageOffset; + commandBuffer->binCl.currMarker->writeMSAAdepthStencilImage = writeMSAAdepthStencilImage; + commandBuffer->binCl.currMarker->writeMSAAdepthStencilImageOffset = writeMSAAdepthStencilImageOffset; + commandBuffer->binCl.currMarker->performResolve = performResolve; + commandBuffer->binCl.currMarker->readMSAAimage = readMSAAimage; + commandBuffer->binCl.currMarker->readMSAAdepthStencilImage = readMSAAdepthStencilImage; if(rp->subpasses[0].colorAttachmentCount > 0) { diff --git a/driver/resource.c b/driver/resource.c index ea1c21d..b857862 100644 --- a/driver/resource.c +++ b/driver/resource.c @@ -204,6 +204,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateImage( i->paddedWidth = 0; //when format is T i->paddedHeight = 0; i->miplevels = pCreateInfo->mipLevels; + memset(i->levelOffsets, 0, sizeof(uint32_t) * 11); i->samples = pCreateInfo->samples; i->layers = pCreateInfo->arrayLayers; i->size = 0; @@ -277,23 +278,63 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkGetImageMemoryRequirements( uint32_t bpp = getFormatBpp(i->format); uint32_t nonPaddedSize = (i->width * i->height * bpp) >> 3; - i->paddedWidth = i->width; - i->paddedHeight = i->height; - //TODO take into account tiling etc. - - //need to pad to T format, as HW automatically chooses that if(nonPaddedSize > 4096) { + //need to pad to T format, as HW automatically chooses that getPaddedTextureDimensionsT(i->width, i->height, bpp, &i->paddedWidth, &i->paddedHeight); } + else + { + //LT format + i->paddedWidth = i->width; + i->paddedHeight = i->height; + } + + uint32_t mipSize = 0; + + //TODO make sure this works properly + for(uint32_t c = 1; c < i->miplevels; ++c) + { + uint32_t mipWidth = max(i->width >> c, 1); + uint32_t mipHeight = max(i->height >> c, 1); + uint32_t mipNonPaddedSize = (mipWidth * mipHeight * bpp) >> 3; + uint32_t mipPaddedWidth, mipPaddedHeight; + + if(mipNonPaddedSize > 4096) + { + //T format + getPaddedTextureDimensionsT(mipWidth, mipHeight, bpp, &mipPaddedWidth, &mipPaddedHeight); + } + else + { + //LT format + mipPaddedWidth = mipWidth; + mipPaddedHeight = mipHeight; + } + + mipPaddedWidth= getPow2Pad(mipPaddedWidth); + mipPaddedHeight = getPow2Pad(mipPaddedHeight); + + //TODO + //i->levelOffsets[c] = ?? + + mipSize += mipPaddedWidth * mipPaddedHeight; + } + + i->levelOffsets[0] = (mipSize * bpp) >> 3; //TODO does this need to be aligned? - i->size = getBOAlignedSize((i->paddedWidth * i->paddedHeight * bpp) >> 3, ARM_PAGE_SIZE); + i->size = getBOAlignedSize(((i->paddedWidth * i->paddedHeight + mipSize) * bpp) >> 3, ARM_PAGE_SIZE); i->stride = (i->paddedWidth * bpp) >> 3; +// fprintf(stderr, "i->levelOffsets[0] %u\n", i->levelOffsets[0]); +// fprintf(stderr, "i->size %u\n", i->size); +// fprintf(stderr, "mipSize %u\n", mipSize); +// fprintf(stderr, "bpp %u\n", bpp); + pMemoryRequirements->alignment = ARM_PAGE_SIZE; - pMemoryRequirements->memoryTypeBits = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; //TODO + pMemoryRequirements->memoryTypeBits = memoryTypes[0].propertyFlags; //TODO pMemoryRequirements->size = i->size; } diff --git a/driver/stateChange.c b/driver/stateChange.c index 1285d9c..defed55 100644 --- a/driver/stateChange.c +++ b/driver/stateChange.c @@ -96,7 +96,8 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage( { //Simplest case: just submit a job to clear the image clFit(commandBuffer, &commandBuffer->binCl, sizeof(CLMarker)); - clInsertNewCLMarker(&commandBuffer->binCl, &commandBuffer->handlesCl, &commandBuffer->shaderRecCl, commandBuffer->shaderRecCount, &commandBuffer->uniformsCl, i, 0, 0, 0, 0, 0, 0, 0, 0); + clInsertNewCLMarker(&commandBuffer->binCl, &commandBuffer->handlesCl, &commandBuffer->shaderRecCl, commandBuffer->shaderRecCount, &commandBuffer->uniformsCl); + commandBuffer->binCl.currMarker->writeImage = i; //insert reloc for render target clFit(commandBuffer, &commandBuffer->handlesCl, 4); diff --git a/test/mipmapping/mipmapping.cpp b/test/mipmapping/mipmapping.cpp index 0034f5c..1be0312 100644 --- a/test/mipmapping/mipmapping.cpp +++ b/test/mipmapping/mipmapping.cpp @@ -1530,7 +1530,6 @@ void CreateTexture() vkFreeCommandBuffers(device, commandPool, 1, &mipgenCommandBuffer); } - { //create sampler for sampling texture VkImageViewCreateInfo view = {}; view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;