1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-02-19 16:54:18 +01:00

rewrote getmemrequirements and the wsi mess

This commit is contained in:
Unknown 2020-03-01 23:28:27 +00:00
parent 9348346ef8
commit 7c01c7846f
4 changed files with 170 additions and 91 deletions

View File

@ -209,6 +209,7 @@ int findDeviceExtension(char* name)
//Textures in T format:
//formed out of 4KB tiles, which have 1KB subtiles (see page 105 in VC4 arch guide)
//1KB subtiles have 512b microtiles.
//Textures in LT format consist of 512b microtiles linearly laid out
//Width/height of the 512b microtiles is the following:
// 64bpp: 2x4
// 32bpp: 4x4
@ -230,62 +231,60 @@ int findDeviceExtension(char* name)
// 8bpp: 64x64
// 4bpp: 128x64
// 1bpp: 256x128
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight)
void getUTileDimensions(uint32_t bpp, uint32_t* tileW, uint32_t* tileH)
{
assert(paddedWidth);
assert(paddedHeight);
uint32_t tileW = 0;
uint32_t tileH = 0;
assert(tileW);
assert(tileH)
switch(bpp)
{
case 256:
{
tileW = 8;
tileH = 16;
*tileW = 1;
*tileH = 2;
break;
}
case 128:
{
tileW = 16;
tileH = 16;
*tileW = 2;
*tileH = 2;
break;
}
case 64:
{
tileW = 16;
tileH = 32;
*tileW = 2;
*tileH = 4;
break;
}
case 32:
case 24: //TODO
{
tileW = 32;
tileH = 32;
*tileW = 4;
*tileH = 4;
break;
}
case 16:
{
tileW = 64;
tileH = 32;
*tileW = 8;
*tileH = 4;
break;
}
case 8:
{
tileW = 64;
tileH = 64;
*tileW = 8;
*tileH = 8;
break;
}
case 4:
{
tileW = 128;
tileH = 64;
*tileW = 16;
*tileH = 8;
break;
}
case 1:
{
tileW = 256;
tileH = 128;
*tileW = 32;
*tileH = 16;
break;
}
default:
@ -294,9 +293,23 @@ void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp,
assert(!"Unsupported texture bpp.");
}
}
}
*paddedWidth = ((tileW - (width % tileW)) % tileW) + width;
*paddedHeight = ((tileH - (height % tileH)) % tileH) + height;
uint32_t roundUp(uint32_t numToRound, uint32_t multiple)
{
if(!multiple)
{
return numToRound;
}
uint32_t remainder = numToRound % multiple;
if(!remainder)
{
return numToRound;
}
return numToRound + multiple - remainder;
}
/*static inline void util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)

View File

@ -196,7 +196,7 @@ typedef struct VkImage_T
uint32_t usageBits;
uint32_t format;
uint32_t imageSpace;
uint32_t tiling; //T or LT
uint32_t tiling; //Linear or T or LT
uint32_t layout;
_deviceMemory* boundMem;
uint32_t boundOffset;
@ -512,7 +512,8 @@ uint32_t packVec4IntoABGR8(const float rgba[4]);
void createImageBO(_image* i);
int findInstanceExtension(char* name);
int findDeviceExtension(char* name);
void getPaddedTextureDimensionsT(uint32_t width, uint32_t height, uint32_t bpp, uint32_t* paddedWidth, uint32_t* paddedHeight);
void getUTileDimensions(uint32_t bpp, uint32_t* tileW, uint32_t* tileH);
uint32_t roundUp(uint32_t numToRound, uint32_t multiple);
int isDepthStencilFormat(VkFormat format);
uint32_t getCompareOp(VkCompareOp op);
uint32_t getStencilOp(VkStencilOp op);

View File

@ -212,7 +212,23 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateImage(
i->usageBits = pCreateInfo->usage;
i->format = pCreateInfo->format;
i->imageSpace = 0;
i->tiling = pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR ? VC4_TILING_FORMAT_LT : VC4_TILING_FORMAT_T;
uint32_t nonPaddedSize = (i->width * i->height * getFormatBpp(i->format)) >> 3;
i->tiling = 0;
if(pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
{
i->tiling = VC4_TILING_FORMAT_LINEAR;
}
else
{
if(nonPaddedSize > 4096)
{
i->tiling = VC4_TILING_FORMAT_T;
}
else
{
i->tiling = VC4_TILING_FORMAT_LT;
}
}
i->layout = pCreateInfo->initialLayout;
i->boundMem = 0;
i->boundOffset = 0;
@ -277,59 +293,110 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkGetImageMemoryRequirements(
_image* i = image;
uint32_t bpp = getFormatBpp(i->format);
uint32_t nonPaddedSize = (i->width * i->height * bpp) >> 3;
uint32_t utileW, utileH;
getUTileDimensions(bpp, &utileW, &utileH);
if(nonPaddedSize > 4096)
switch(i->tiling)
{
//need to pad to T format, as HW automatically chooses that
getPaddedTextureDimensionsT(i->width, i->height, bpp, &i->paddedWidth, &i->paddedHeight);
}
else
{
//LT format
i->paddedWidth = i->width;
i->paddedHeight = i->height;
case VC4_TILING_FORMAT_T:
{
//need to pad to T format, as HW automatically chooses that
i->paddedWidth = roundUp(i->width, utileW * 8);
i->paddedHeight = roundUp(i->height, utileH * 8);
break;
}
case VC4_TILING_FORMAT_LT:
{
//LT format
i->paddedWidth = roundUp(i->width, utileW);
i->paddedHeight = roundUp(i->height, utileH);
break;
}
case VC4_TILING_FORMAT_LINEAR:
{
//linear format
i->paddedWidth = roundUp(i->width, utileW);
i->paddedHeight = i->height;
break;
}
}
i->stride = (i->paddedWidth * bpp) >> 3;
uint32_t mipSize = 0;
//TODO make sure this works properly
for(uint32_t c = 1; c < i->miplevels; ++c)
//mip levels are laid out in memory the following way:
//0x0.................................................0xffffff
//smallest mip level ... largest mip level - 1, base mip level
//base mip level offset must be a multiple of 4KB
//mip levels other than the base must be aligned to power of two sizes
//mip levels must be padded to either T or LT format depending on size
uint32_t prevMipPaddedSize = 0;
for(uint32_t c = i->miplevels - 1; c >= 1; --c)
{
uint32_t mipWidth = max(i->width >> c, 1);
uint32_t mipHeight = max(i->height >> c, 1);
uint32_t mipNonPaddedSize = (mipWidth * mipHeight * bpp) >> 3;
uint32_t nonPaddedSize = (mipWidth * mipHeight * bpp) >> 3;
uint32_t mipPaddedWidth, mipPaddedHeight;
if(mipNonPaddedSize > 4096)
uint32_t tiling = i->tiling;
if(i->tiling == VC4_TILING_FORMAT_T && nonPaddedSize <= 4096)
{
//T format
getPaddedTextureDimensionsT(mipWidth, mipHeight, bpp, &mipPaddedWidth, &mipPaddedHeight);
}
else
{
//LT format
mipPaddedWidth = mipWidth;
mipPaddedHeight = mipHeight;
tiling = VC4_TILING_FORMAT_LT;
}
mipPaddedWidth= getPow2Pad(mipPaddedWidth);
mipPaddedHeight = getPow2Pad(mipPaddedHeight);
switch(tiling)
{
case VC4_TILING_FORMAT_T:
{
//T format
mipPaddedWidth = roundUp(mipWidth, utileW * 8);
mipPaddedHeight = roundUp(mipHeight, utileH * 8);
break;
}
case VC4_TILING_FORMAT_LT:
{
//LT format
mipPaddedWidth = roundUp(mipWidth, utileW);
mipPaddedHeight = roundUp(mipHeight, utileH);
break;
}
case VC4_TILING_FORMAT_LINEAR:
{
//linear format
mipPaddedWidth = roundUp(mipWidth, utileW);
mipPaddedHeight = mipHeight;
break;
}
}
//TODO
//i->levelOffsets[c] = ??
// mipPaddedWidth = getPow2Pad(mipPaddedWidth);
// mipPaddedHeight = getPow2Pad(mipPaddedHeight);
uint32_t mipPaddedSize = (mipPaddedWidth * mipPaddedHeight * bpp) >> 3;
i->levelOffsets[c] = prevMipPaddedSize;
// fprintf(stderr, "mipPaddedWidth: %u\n", mipPaddedWidth);
// fprintf(stderr, "mipPaddedHeight: %u\n", mipPaddedHeight);
// fprintf(stderr, "i->levelOffsets[%u]: %u\n", c, i->levelOffsets[c]);
prevMipPaddedSize += mipPaddedSize;
mipSize += mipPaddedWidth * mipPaddedHeight;
}
i->levelOffsets[0] = (mipSize * bpp) >> 3;
//must be a multiple of 4096 bytes
i->levelOffsets[0] = getBOAlignedSize((mipSize * bpp) >> 3, 4096);
//TODO does this need to be aligned?
i->size = getBOAlignedSize(((i->paddedWidth * i->paddedHeight + mipSize) * bpp) >> 3, ARM_PAGE_SIZE);
i->stride = (i->paddedWidth * bpp) >> 3;
i->size = getBOAlignedSize(((i->paddedWidth * i->paddedHeight * bpp) >> 3) + i->levelOffsets[0], ARM_PAGE_SIZE);
// fprintf(stderr, "i->tiling %u\n", i->tiling);
// fprintf(stderr, "i->levelOffsets[0] %u\n", i->levelOffsets[0]);
// fprintf(stderr, "i->size %u\n", i->size);
// fprintf(stderr, "i->paddedWidth %u\n", i->paddedWidth);
// fprintf(stderr, "i->paddedHeight %u\n", i->paddedHeight);
// fprintf(stderr, "mipSize %u\n", mipSize);
// fprintf(stderr, "bpp %u\n", bpp);
@ -362,6 +429,16 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkBindImageMemory(
i->boundMem = m;
i->boundOffset = memoryOffset;
//TODO not sure if this is necessary
if(i->tiling == VC4_TILING_FORMAT_LINEAR)
{
int ret = vc4_bo_set_tiling(controlFd, i->boundMem->bo, DRM_FORMAT_MOD_LINEAR); assert(ret);
}
else if(i->tiling == VC4_TILING_FORMAT_T)
{
int ret = vc4_bo_set_tiling(controlFd, i->boundMem->bo, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret);
}
return VK_SUCCESS;
}

View File

@ -202,39 +202,37 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSwapchainKHR(
for(int c = 0; c < pCreateInfo->minImageCount; ++c)
{
s->images[c].boundMem = 0;
s->images[c].boundOffset = 0;
s->images[c].width = pCreateInfo->imageExtent.width;
s->images[c].height = pCreateInfo->imageExtent.height;
s->images[c].depth = 1;
s->images[c].layers = pCreateInfo->imageArrayLayers;
s->images[c].miplevels = 1;
s->images[c].samples = 1; //TODO
s->images[c].usageBits = pCreateInfo->imageUsage;
s->images[c].format = pCreateInfo->imageFormat;
VkImageCreateInfo imageCreateInfo = {};
imageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
imageCreateInfo.format = pCreateInfo->imageFormat;
imageCreateInfo.mipLevels = 1;
imageCreateInfo.arrayLayers = pCreateInfo->imageArrayLayers;
imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageCreateInfo.usage = pCreateInfo->imageUsage;
imageCreateInfo.sharingMode = pCreateInfo->imageSharingMode;
imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageCreateInfo.queueFamilyIndexCount = pCreateInfo->queueFamilyIndexCount;
imageCreateInfo.pQueueFamilyIndices = pCreateInfo->pQueueFamilyIndices;
imageCreateInfo.extent.width = pCreateInfo->imageExtent.width;
imageCreateInfo.extent.height = pCreateInfo->imageExtent.height;
imageCreateInfo.extent.depth = 1;
VkImage img;
rpi_vkCreateImage(device, &imageCreateInfo, pAllocator, &img);
s->images[c] = *(_image*)img;
s->images[c].imageSpace = pCreateInfo->imageColorSpace;
s->images[c].concurrentAccess = pCreateInfo->imageSharingMode;
s->images[c].numQueueFamiliesWithAccess = pCreateInfo->queueFamilyIndexCount;
if(s->images[c].concurrentAccess)
{
s->images[c].queueFamiliesWithAccess = ALLOCATE(sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!s->images[c].queueFamiliesWithAccess)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(s->images[c].queueFamiliesWithAccess, pCreateInfo->pQueueFamilyIndices, sizeof(uint32_t)*s->images[c].numQueueFamiliesWithAccess);
}
s->images[c].preTransformMode = pCreateInfo->preTransform;
s->images[c].compositeAlpha = pCreateInfo->compositeAlpha;
s->images[c].presentMode = pCreateInfo->presentMode;
s->images[c].clipped = pCreateInfo->clipped;
VkMemoryRequirements mr;
rpi_vkGetImageMemoryRequirements(device, &s->images[c], &mr);
//TODO is this the right place to do this?
s->images[c].tiling = VC4_TILING_FORMAT_T;
s->images[c].alignment = mr.alignment;
VkMemoryAllocateInfo ai;
@ -250,20 +248,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSwapchainKHR(
}
VkDeviceMemory mem;
rpi_vkAllocateMemory(device, &ai, 0, &mem);
rpi_vkAllocateMemory(device, &ai, pAllocator, &mem);
rpi_vkBindImageMemory(device, &s->images[c], mem, 0);
//set tiling to T if size > 4KB
if(s->images[c].tiling == VC4_TILING_FORMAT_T)
{
int ret = vc4_bo_set_tiling(controlFd, s->images[c].boundMem->bo, DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED); assert(ret);
}
else
{
int ret = vc4_bo_set_tiling(controlFd, s->images[c].boundMem->bo, DRM_FORMAT_MOD_LINEAR); assert(ret);
}
int res = modeset_create_fb(controlFd, &s->images[c]); assert(res == 0);
}