diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index bbc0427..400df6c 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -530,18 +530,25 @@ void clInsertClipWindow(ControlList* cl, *(uint32_t*)cl->nextFreeByte = moveBits(width, 16, 0) | moveBits(height, 16, 16); cl->nextFreeByte += 4; } +uint16_t get16bitSignedFixedNumber(float x) +{ + int32_t integerPart = roundf(x * 16.0f); + return integerPart & 0xffff; +} + //viewport centre x/y coordinate void clInsertViewPortOffset(ControlList* cl, - int16_t x, //sint16 - int16_t y //sint16 + float x, + float y ) { assert(cl); assert(cl->buffer); assert(cl->nextFreeByte); *cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++; - *(int16_t*)cl->nextFreeByte = x * 16; cl->nextFreeByte += 2; - *(int16_t*)cl->nextFreeByte = y * 16; cl->nextFreeByte += 2; + //expects 16 bit signed fixed point number with 4 fractional bits + *(uint16_t*)cl->nextFreeByte = get16bitSignedFixedNumber(x); cl->nextFreeByte += 2; + *(uint16_t*)cl->nextFreeByte = get16bitSignedFixedNumber(y); cl->nextFreeByte += 2; } void clInsertZMinMaxClippingPlanes(ControlList* cl, diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index 0b57f25..db3c97c 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -37,6 +37,7 @@ typedef struct CLMarker uint32_t clearColor[2]; uint32_t clearDepth, clearStencil; uint32_t width, height; //render w/h + uint32_t renderToMip; //pointers that point to where all the other CL data is //plus sizes @@ -144,8 +145,8 @@ void clInsertClipWindow(ControlList* cl, uint32_t bottomPixelCoord, //uint16 uint32_t leftPixelCoord); //uint16 void clInsertViewPortOffset(ControlList* cl, - int16_t x, //sint16 - int16_t y //sint16 + float x, + float y ); void clInsertZMinMaxClippingPlanes(ControlList* cl, float minZw, diff --git a/driver/command.c b/driver/command.c index 798705e..f1398c0 100644 --- a/driver/command.c +++ b/driver/command.c @@ -357,12 +357,21 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( //fill out submit cl fields if(writeImage) { + uint32_t nonPaddedSize = (marker->width * marker->height * getFormatBpp(writeImage->format)) >> 3; + + uint32_t tiling = writeImage->tiling; + + if(writeImage->tiling == VC4_TILING_FORMAT_T && nonPaddedSize <= 4096) + { + tiling = VC4_TILING_FORMAT_LT; + } + submitCl.color_write.hindex = writeImageIdx; submitCl.color_write.offset = marker->writeImageOffset; submitCl.color_write.flags = 0; submitCl.color_write.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(writeImage->format), VC4_RENDER_CONFIG_FORMAT) | - VC4_SET_FIELD(writeImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); + VC4_SET_FIELD(tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); if(performResolve) { @@ -380,20 +389,38 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(readImage) { + uint32_t nonPaddedSize = (marker->width * marker->height * getFormatBpp(readImage->format)) >> 3; + + uint32_t tiling = readImage->tiling; + + if(readImage->tiling == VC4_TILING_FORMAT_T && nonPaddedSize <= 4096) + { + tiling = VC4_TILING_FORMAT_LT; + } + submitCl.color_read.hindex = readImageIdx; submitCl.color_read.offset = marker->readImageOffset; submitCl.color_read.flags = readMSAAimage ? VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES : 0; submitCl.color_read.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(readImage->format), VC4_RENDER_CONFIG_FORMAT) | - VC4_SET_FIELD(readImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); + VC4_SET_FIELD(tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); } if(writeDepthStencilImage) { + uint32_t nonPaddedSize = (marker->width * marker->height * getFormatBpp(writeDepthStencilImage->format)) >> 3; + + uint32_t tiling = writeDepthStencilImage->tiling; + + if(writeDepthStencilImage->tiling == VC4_TILING_FORMAT_T && nonPaddedSize <= 4096) + { + tiling = VC4_TILING_FORMAT_LT; + } + submitCl.zs_write.hindex = writeDepthStencilImageIdx; submitCl.zs_write.offset = marker->writeDepthStencilImageOffset; submitCl.zs_write.flags = 0; submitCl.zs_write.bits = VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, VC4_LOADSTORE_TILE_BUFFER_BUFFER) | - VC4_SET_FIELD(writeDepthStencilImage->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING); + VC4_SET_FIELD(tiling, VC4_LOADSTORE_TILE_BUFFER_TILING); } if(writeMSAAdepthStencilImage) @@ -406,11 +433,20 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( if(readDepthStencilImage) { + uint32_t nonPaddedSize = (marker->width * marker->height * getFormatBpp(readDepthStencilImage->format)) >> 3; + + uint32_t tiling = readDepthStencilImage->tiling; + + if(readDepthStencilImage->tiling == VC4_TILING_FORMAT_T && nonPaddedSize <= 4096) + { + tiling = VC4_TILING_FORMAT_LT; + } + submitCl.zs_read.hindex = readDepthStencilImageIdx; submitCl.zs_read.offset = marker->readDepthStencilImageOffset; submitCl.zs_read.flags = readMSAAdepthStencilImage ? VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES : 0; //TODO is this valid? submitCl.zs_read.bits = VC4_SET_FIELD(getRenderTargetFormatVC4(readDepthStencilImage->format), VC4_RENDER_CONFIG_FORMAT) | - VC4_SET_FIELD(readDepthStencilImage->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); + VC4_SET_FIELD(tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); } submitCl.clear_color[0] = marker->clearColor[0]; @@ -460,6 +496,13 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( widthInTiles = divRoundUp(width, tileSizeW); heightInTiles = divRoundUp(height, tileSizeH); + //pad width if rendering to miplevel + if(marker->renderToMip) + { + width = getPow2Pad(width); + width = width < 4 ? 4 : width; + } + submitCl.max_x_tile = widthInTiles - 1; submitCl.max_y_tile = heightInTiles - 1; submitCl.width = width; diff --git a/driver/draw.c b/driver/draw.c index 64609df..a65ff98 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -71,7 +71,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer) //Viewport Offset clFit(commandBuffer, &commandBuffer->binCl, V3D21_VIEWPORT_OFFSET_length); - clInsertViewPortOffset(&commandBuffer->binCl, ((int16_t)vp.width) >> 1, ((int16_t)vp.height) >> 1); + clInsertViewPortOffset(&commandBuffer->binCl, vp.width * 0.5f, vp.height * 0.5f); cb->viewportDirty = 0; } diff --git a/driver/renderpass.c b/driver/renderpass.c index b38e79d..2ef5742 100644 --- a/driver/renderpass.c +++ b/driver/renderpass.c @@ -229,6 +229,23 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB bpp = getFormatBpp(writeMSAAimage->format); } + uint32_t biggestMip = 0; + for(uint32_t c = 0; c < fb->numAttachmentViews; ++c) + { + biggestMip = max(biggestMip, fb->attachmentViews[c].subresourceRange.baseMipLevel); + } + + //pad render size if we are rendering to a mip level + cb->binCl.currMarker->renderToMip = biggestMip > 0; + + uint32_t width = cb->binCl.currMarker->width; + + if(cb->binCl.currMarker->renderToMip) + { + width = getPow2Pad(width); + width = width < 4 ? 4 : width; + } + clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length); clInsertTileBinningModeConfiguration(&commandBuffer->binCl, 0, //double buffer in non ms mode @@ -237,7 +254,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB 0, //auto initialize tile state data array bpp == 64, //64 bit color mode writeMSAAimage || writeMSAAdepthStencilImage || performResolve ? 1 : 0, //msaa - cb->binCl.currMarker->width, cb->binCl.currMarker->height, + width, cb->binCl.currMarker->height, 0, //tile state data array address 0, //tile allocation memory size 0); //tile allocation memory address diff --git a/driver/stateChange.c b/driver/stateChange.c index defed55..ea79e49 100644 --- a/driver/stateChange.c +++ b/driver/stateChange.c @@ -135,6 +135,9 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdClearColorImage( commandBuffer->binCl.currMarker->clearColor[0] = commandBuffer->binCl.currMarker->clearColor[1] = packVec4IntoABGR8(pColor->float32); commandBuffer->binCl.currMarker->flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; + + commandBuffer->binCl.currMarker->width = i->width; + commandBuffer->binCl.currMarker->height = i->height; } } diff --git a/test/depthTex/depthTex.cpp b/test/depthTex/depthTex.cpp index 2d4744c..a91ad2e 100644 --- a/test/depthTex/depthTex.cpp +++ b/test/depthTex/depthTex.cpp @@ -1368,7 +1368,7 @@ void CreateTexture() VkBufferImageCopy bufferCopyRegion = {}; bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - bufferCopyRegion.imageSubresource.mipLevel = 1; + bufferCopyRegion.imageSubresource.mipLevel = 0; bufferCopyRegion.imageSubresource.baseArrayLayer = 0; bufferCopyRegion.imageSubresource.layerCount = 1; bufferCopyRegion.imageExtent.width = width;