diff --git a/driver/common.c b/driver/common.c index 855c4c9..cb295c1 100644 --- a/driver/common.c +++ b/driver/common.c @@ -209,6 +209,7 @@ int findDeviceExtension(char* name) //Textures in T format: //formed out of 4KB tiles, which have 1KB subtiles (see page 105 in VC4 arch guide) //1KB subtiles have 512b microtiles. +//Textures in LT format consist of 512b microtiles linearly laid out //Width/height of the 512b microtiles is the following: // 64bpp: 2x4 // 32bpp: 4x4 @@ -230,62 +231,60 @@ int findDeviceExtension(char* name) // 8bpp: 64x64 // 4bpp: 128x64 // 1bpp: 256x128 -void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight) +void getUTileDimensions(uint32_t bpp, uint32_t* tileW, uint32_t* tileH) { - assert(paddedWidth); - assert(paddedHeight); - uint32_t tileW = 0; - uint32_t tileH = 0; + assert(tileW); + assert(tileH) switch(bpp) { case 256: { - tileW = 8; - tileH = 16; + *tileW = 1; + *tileH = 2; break; } case 128: { - tileW = 16; - tileH = 16; + *tileW = 2; + *tileH = 2; break; } case 64: { - tileW = 16; - tileH = 32; + *tileW = 2; + *tileH = 4; break; } case 32: case 24: //TODO { - tileW = 32; - tileH = 32; + *tileW = 4; + *tileH = 4; break; } case 16: { - tileW = 64; - tileH = 32; + *tileW = 8; + *tileH = 4; break; } case 8: { - tileW = 64; - tileH = 64; + *tileW = 8; + *tileH = 8; break; } case 4: { - tileW = 128; - tileH = 64; + *tileW = 16; + *tileH = 8; break; } case 1: { - tileW = 256; - tileH = 128; + *tileW = 32; + *tileH = 16; break; } default: @@ -294,9 +293,23 @@ void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, assert(!"Unsupported texture bpp."); } } +} - *paddedWidth = ((tileW - (width % tileW)) % tileW) + width; - *paddedHeight = ((tileH - (height % tileH)) % tileH) + height; +uint32_t roundUp(uint32_t numToRound, uint32_t multiple) +{ + if(!multiple) + { + return numToRound; + } + + uint32_t remainder = numToRound % multiple; + + if(!remainder) + { + return numToRound; + } + + return numToRound + multiple - remainder; } /*static inline void util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc) diff --git a/driver/common.h b/driver/common.h index a30ddc3..783a30f 100644 --- a/driver/common.h +++ b/driver/common.h @@ -196,7 +196,7 @@ typedef struct VkImage_T uint32_t usageBits; uint32_t format; uint32_t imageSpace; - uint32_t tiling; //T or LT + uint32_t tiling; //Linear or T or LT uint32_t layout; _deviceMemory* boundMem; uint32_t boundOffset; @@ -512,7 +512,8 @@ uint32_t packVec4IntoABGR8(const float rgba[4]); void createImageBO(_image* i); int findInstanceExtension(char* name); int findDeviceExtension(char* name); -void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight); +void getUTileDimensions(uint32_t bpp, uint32_t* tileW, uint32_t* tileH); +uint32_t roundUp(uint32_t numToRound, uint32_t multiple); int isDepthStencilFormat(VkFormat format); uint32_t getCompareOp(VkCompareOp op); uint32_t getStencilOp(VkStencilOp op); diff --git a/driver/resource.c b/driver/resource.c index b857862..012a639 100644 --- a/driver/resource.c +++ b/driver/resource.c @@ -212,7 +212,23 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateImage( i->usageBits = pCreateInfo->usage; i->format = pCreateInfo->format; i->imageSpace = 0; - i->tiling = pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR ? VC4_TILING_FORMAT_LT : VC4_TILING_FORMAT_T; + uint32_t nonPaddedSize = (i->width * i->height * getFormatBpp(i->format)) >> 3; + i->tiling = 0; + if(pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) + { + i->tiling = VC4_TILING_FORMAT_LINEAR; + } + else + { + if(nonPaddedSize > 4096) + { + i->tiling = VC4_TILING_FORMAT_T; + } + else + { + i->tiling = VC4_TILING_FORMAT_LT; + } + } i->layout = pCreateInfo->initialLayout; i->boundMem = 0; i->boundOffset = 0; @@ -277,59 +293,110 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkGetImageMemoryRequirements( _image* i = image; uint32_t bpp = getFormatBpp(i->format); - uint32_t nonPaddedSize = (i->width * i->height * bpp) >> 3; + uint32_t utileW, utileH; + getUTileDimensions(bpp, &utileW, &utileH); - if(nonPaddedSize > 4096) + switch(i->tiling) { - //need to pad to T format, as HW automatically chooses that - getPaddedTextureDimensionsT(i->width, i->height, bpp, &i->paddedWidth, &i->paddedHeight); - } - else - { - //LT format - i->paddedWidth = i->width; - i->paddedHeight = i->height; + case VC4_TILING_FORMAT_T: + { + //need to pad to T format, as HW automatically chooses that + i->paddedWidth = roundUp(i->width, utileW * 8); + i->paddedHeight = roundUp(i->height, utileH * 8); + break; + } + case VC4_TILING_FORMAT_LT: + { + //LT format + i->paddedWidth = roundUp(i->width, utileW); + i->paddedHeight = roundUp(i->height, utileH); + break; + } + case VC4_TILING_FORMAT_LINEAR: + { + //linear format + i->paddedWidth = roundUp(i->width, utileW); + i->paddedHeight = i->height; + break; + } } + i->stride = (i->paddedWidth * bpp) >> 3; + uint32_t mipSize = 0; - //TODO make sure this works properly - for(uint32_t c = 1; c < i->miplevels; ++c) + //mip levels are laid out in memory the following way: + //0x0.................................................0xffffff + //smallest mip level ... largest mip level - 1, base mip level + //base mip level offset must be a multiple of 4KB + //mip levels other than the base must be aligned to power of two sizes + //mip levels must be padded to either T or LT format depending on size + + uint32_t prevMipPaddedSize = 0; + + for(uint32_t c = i->miplevels - 1; c >= 1; --c) { uint32_t mipWidth = max(i->width >> c, 1); uint32_t mipHeight = max(i->height >> c, 1); - uint32_t mipNonPaddedSize = (mipWidth * mipHeight * bpp) >> 3; + uint32_t nonPaddedSize = (mipWidth * mipHeight * bpp) >> 3; uint32_t mipPaddedWidth, mipPaddedHeight; - if(mipNonPaddedSize > 4096) + uint32_t tiling = i->tiling; + + if(i->tiling == VC4_TILING_FORMAT_T && nonPaddedSize <= 4096) { - //T format - getPaddedTextureDimensionsT(mipWidth, mipHeight, bpp, &mipPaddedWidth, &mipPaddedHeight); - } - else - { - //LT format - mipPaddedWidth = mipWidth; - mipPaddedHeight = mipHeight; + tiling = VC4_TILING_FORMAT_LT; } - mipPaddedWidth= getPow2Pad(mipPaddedWidth); - mipPaddedHeight = getPow2Pad(mipPaddedHeight); + switch(tiling) + { + case VC4_TILING_FORMAT_T: + { + //T format + mipPaddedWidth = roundUp(mipWidth, utileW * 8); + mipPaddedHeight = roundUp(mipHeight, utileH * 8); + break; + } + case VC4_TILING_FORMAT_LT: + { + //LT format + mipPaddedWidth = roundUp(mipWidth, utileW); + mipPaddedHeight = roundUp(mipHeight, utileH); + break; + } + case VC4_TILING_FORMAT_LINEAR: + { + //linear format + mipPaddedWidth = roundUp(mipWidth, utileW); + mipPaddedHeight = mipHeight; + break; + } + } - //TODO - //i->levelOffsets[c] = ?? +// mipPaddedWidth = getPow2Pad(mipPaddedWidth); +// mipPaddedHeight = getPow2Pad(mipPaddedHeight); + + uint32_t mipPaddedSize = (mipPaddedWidth * mipPaddedHeight * bpp) >> 3; + + i->levelOffsets[c] = prevMipPaddedSize; +// fprintf(stderr, "mipPaddedWidth: %u\n", mipPaddedWidth); +// fprintf(stderr, "mipPaddedHeight: %u\n", mipPaddedHeight); +// fprintf(stderr, "i->levelOffsets[%u]: %u\n", c, i->levelOffsets[c]); + prevMipPaddedSize += mipPaddedSize; mipSize += mipPaddedWidth * mipPaddedHeight; } - i->levelOffsets[0] = (mipSize * bpp) >> 3; + //must be a multiple of 4096 bytes + i->levelOffsets[0] = getBOAlignedSize((mipSize * bpp) >> 3, 4096); - //TODO does this need to be aligned? - i->size = getBOAlignedSize(((i->paddedWidth * i->paddedHeight + mipSize) * bpp) >> 3, ARM_PAGE_SIZE); - i->stride = (i->paddedWidth * bpp) >> 3; + i->size = getBOAlignedSize(((i->paddedWidth * i->paddedHeight * bpp) >> 3) + i->levelOffsets[0], ARM_PAGE_SIZE); +// fprintf(stderr, "i->tiling %u\n", i->tiling); // fprintf(stderr, "i->levelOffsets[0] %u\n", i->levelOffsets[0]); // fprintf(stderr, "i->size %u\n", i->size); +// fprintf(stderr, "i->paddedWidth %u\n", i->paddedWidth); +// fprintf(stderr, "i->paddedHeight %u\n", i->paddedHeight); // fprintf(stderr, "mipSize %u\n", mipSize); // fprintf(stderr, "bpp %u\n", bpp); @@ -362,6 +429,16 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkBindImageMemory( i->boundMem = m; i->boundOffset = memoryOffset; + //TODO not sure if this is necessary + if(i->tiling == VC4_TILING_FORMAT_LINEAR) + { + int ret = vc4_bo_set_tiling(controlFd, i->boundMem->bo, DRM_FORMAT_MOD_LINEAR); assert(ret); + } + else if(i->tiling == VC4_TILING_FORMAT_T) + { + int ret = vc4_bo_set_tiling(controlFd, i->boundMem->bo, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret); + } + return VK_SUCCESS; } diff --git a/driver/wsi.c b/driver/wsi.c index 736e1d1..43d0f50 100644 --- a/driver/wsi.c +++ b/driver/wsi.c @@ -202,39 +202,37 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSwapchainKHR( for(int c = 0; c < pCreateInfo->minImageCount; ++c) { - s->images[c].boundMem = 0; - s->images[c].boundOffset = 0; - s->images[c].width = pCreateInfo->imageExtent.width; - s->images[c].height = pCreateInfo->imageExtent.height; - s->images[c].depth = 1; - s->images[c].layers = pCreateInfo->imageArrayLayers; - s->images[c].miplevels = 1; - s->images[c].samples = 1; //TODO - s->images[c].usageBits = pCreateInfo->imageUsage; - s->images[c].format = pCreateInfo->imageFormat; + VkImageCreateInfo imageCreateInfo = {}; + imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageCreateInfo.imageType = VK_IMAGE_TYPE_2D; + imageCreateInfo.format = pCreateInfo->imageFormat; + imageCreateInfo.mipLevels = 1; + imageCreateInfo.arrayLayers = pCreateInfo->imageArrayLayers; + imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageCreateInfo.usage = pCreateInfo->imageUsage; + imageCreateInfo.sharingMode = pCreateInfo->imageSharingMode; + imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageCreateInfo.queueFamilyIndexCount = pCreateInfo->queueFamilyIndexCount; + imageCreateInfo.pQueueFamilyIndices = pCreateInfo->pQueueFamilyIndices; + imageCreateInfo.extent.width = pCreateInfo->imageExtent.width; + imageCreateInfo.extent.height = pCreateInfo->imageExtent.height; + imageCreateInfo.extent.depth = 1; + + VkImage img; + rpi_vkCreateImage(device, &imageCreateInfo, pAllocator, &img); + + s->images[c] = *(_image*)img; + s->images[c].imageSpace = pCreateInfo->imageColorSpace; - s->images[c].concurrentAccess = pCreateInfo->imageSharingMode; - s->images[c].numQueueFamiliesWithAccess = pCreateInfo->queueFamilyIndexCount; - if(s->images[c].concurrentAccess) - { - s->images[c].queueFamiliesWithAccess = ALLOCATE(sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if(!s->images[c].queueFamiliesWithAccess) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - memcpy(s->images[c].queueFamiliesWithAccess, pCreateInfo->pQueueFamilyIndices, sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess); - } s->images[c].preTransformMode = pCreateInfo->preTransform; s->images[c].compositeAlpha = pCreateInfo->compositeAlpha; s->images[c].presentMode = pCreateInfo->presentMode; s->images[c].clipped = pCreateInfo->clipped; - VkMemoryRequirements mr; rpi_vkGetImageMemoryRequirements(device, &s->images[c], &mr); - //TODO is this the right place to do this? - s->images[c].tiling = VC4_TILING_FORMAT_T; s->images[c].alignment = mr.alignment; VkMemoryAllocateInfo ai; @@ -250,20 +248,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSwapchainKHR( } VkDeviceMemory mem; - rpi_vkAllocateMemory(device, &ai, 0, &mem); + rpi_vkAllocateMemory(device, &ai, pAllocator, &mem); rpi_vkBindImageMemory(device, &s->images[c], mem, 0); - //set tiling to T if size > 4KB - if(s->images[c].tiling == VC4_TILING_FORMAT_T) - { - int ret = vc4_bo_set_tiling(controlFd, s->images[c].boundMem->bo, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret); - } - else - { - int ret = vc4_bo_set_tiling(controlFd, s->images[c].boundMem->bo, DRM_FORMAT_MOD_LINEAR); assert(ret); - } - int res = modeset_create_fb(controlFd, &s->images[c]); assert(res == 0); }