1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-03-21 12:29:15 +01:00

bunch of fixes trying to get mipmap generation working

This commit is contained in:
Unknown 2020-03-10 20:20:35 +00:00
parent e90f05657c
commit 5e72111ba8
12 changed files with 93 additions and 57 deletions

View File

@ -36,6 +36,7 @@ typedef struct CLMarker
void* perfmonID;
uint32_t clearColor[2];
uint32_t clearDepth, clearStencil;
uint32_t width, height; //render w/h
//pointers that point to where all the other CL data is
//plus sizes

View File

@ -434,28 +434,17 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
uint32_t widthInTiles = 0, heightInTiles = 0;
uint32_t width = 0, height = 0, bpp = 0;
width = marker->width;
height = marker->height;
if(writeImage)
{
width = writeImage->width;
height = writeImage->height;
bpp = getFormatBpp(writeImage->format);
}
else if(writeMSAAimage)
{
width = writeMSAAimage->width;
height = writeMSAAimage->height;
bpp = getFormatBpp(writeMSAAimage->format);
}
else if(writeDepthStencilImage)
{
width = writeDepthStencilImage->width;
height = writeDepthStencilImage->height;
}
else if(writeMSAAdepthStencilImage)
{
width = writeMSAAdepthStencilImage->width;
height = writeMSAAdepthStencilImage->height;
}
if(bpp == 64)
{
@ -509,7 +498,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
submitCl.shader_rec_count = marker->shaderRecCount;
submitCl.uniforms_size = marker->uniformsSize;
/**
/**/
printf("BCL:\n");
clDump(((uint8_t*)marker) + sizeof(CLMarker), marker->size);
printf("BO handles: ");

View File

@ -678,7 +678,7 @@ void encodeTextureUniform(uint32_t* params, //array of 4 uint32_t
params[2] = 0
| (noAutoLod & 0x1)
| (uint32_t)(cubemapStride & 0x3ffff) << 12
| (uint32_t)(isCubeMap ? 1 : 0) << 30;
| (uint32_t)(isCubeMap || noAutoLod ? 1 : 0) << 30;
//TODO
//child images

View File

@ -422,6 +422,7 @@ typedef struct VkSampler_T
VkSamplerMipmapMode mipmapMode;
VkSamplerAddressMode addressModeU, addressModeV, addressModeW;
float mipLodBias;
uint32_t disableAutoLod;
VkBool32 anisotropyEnable;
float maxAnisotropy;
VkBool32 compareEnable;

View File

@ -136,13 +136,17 @@ void createSampler(VkDevice device, VkSampler* nearestTextureSampler, VkSampler*
sampler.mipLodBias = 0.0f;
sampler.compareOp = VK_COMPARE_OP_NEVER;
sampler.minLod = 0.0f;
sampler.maxLod = 999.0f;
sampler.maxLod = 0.0f;
sampler.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
rpi_vkCreateSampler(device, &sampler, 0, nearestTextureSampler);
_sampler* s = nearestTextureSampler;
s->disableAutoLod = 1;
sampler.magFilter = VK_FILTER_LINEAR;
sampler.minFilter = VK_FILTER_LINEAR;
rpi_vkCreateSampler(device, &sampler, 0, linearTextureSampler);
s = linearTextureSampler;
s->disableAutoLod = 1;
}
void createRendertarget(VkDevice device, uint32_t baseMip, uint32_t width, uint32_t height, VkImage textureImage, VkImageView* textureView, VkRenderPass* offscreenRenderPass, VkFramebuffer* offscreenFramebuffer)
@ -150,6 +154,11 @@ void createRendertarget(VkDevice device, uint32_t baseMip, uint32_t width, uint3
_image* img = textureImage;
VkFormat format = img->format;
printf("\nCopy Create RT\n");
printf("baseMip %u\n", baseMip);
printf("width %u\n", width);
printf("height %u\n", height);
//we can't render to an ETC1 texture, so we'll just stick with RGBA8 for now
if(img->format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK)
{
@ -723,9 +732,10 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha
///r0 = varyingY * W
"sig_none ; r2 = fadd.always(r0, r5, pay_zw, vary) ; r0 = fmul.always(a, b) ;"
///r3 = r0 + r5 (C)
"sig_none ; r3 = fadd.pm.always(r0, r5) ; nop = nop(r0, r0) ;"
"sig_none ; r3 = fadd.pm.always(r0, r5, nop, uni) ; r0 = v8min.always(b, b) ;"
///write texture addresses (x, y)
///writing tmu0_s signals that all coordinates are written
"sig_none ; tmu0_b = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; tmu0_t = or.always(r3, r3) ; nop = nop(r0, r0) ;"
"sig_none ; tmu0_s = or.always(r2, r2) ; nop = nop(r0, r0) ;"
///suspend thread (after 2 nops) to wait for TMU request to finish
@ -740,6 +750,7 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha
"sig_none ; r2 = fmax.pm.always.8c(r4, r4) ; r0.8b = v8min.always(r1, r1) ;"
"sig_none ; r3 = fmax.pm.always.8d(r4, r4) ; r0.8c = v8min.always(r2, r2) ;"
"sig_none ; nop = nop.pm(r0, r0) ; r0.8d = v8min.always(r3, r3) ;"
///"sig_small_imm; r0 = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
@ -792,6 +803,15 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha
//fragment shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
VK_SHADER_STAGE_FRAGMENT_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, //descriptor type
@ -800,7 +820,8 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha
0, //descriptor array element #
0, //resource offset
VK_SHADER_STAGE_FRAGMENT_BIT
}
},
};
VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {};
@ -998,6 +1019,24 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
VkPipeline blitPipeline;
VkPipelineLayout blitPipelineLayout;
VkSampler mipSampler;
VkSamplerCreateInfo samplerCI = {};
samplerCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
samplerCI.magFilter = filter == VK_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
samplerCI.minFilter = filter == VK_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerCI.mipLodBias = srcMipLevel;
samplerCI.compareOp = VK_COMPARE_OP_NEVER;
samplerCI.minLod = 0.0f;
samplerCI.maxLod = 0.0f;
samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
rpi_vkCreateSampler(device, &samplerCI, 0, &mipSampler);
_sampler* s = mipSampler;
s->disableAutoLod = 1;
VkImageViewCreateInfo view = {};
view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
@ -1010,7 +1049,6 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
view.image = srcImage;
rpi_vkCreateImageView(device, &view, 0, &srcTextureView);
//TODO this crashes somehow
//create blit descriptor set
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
@ -1022,7 +1060,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
VkDescriptorImageInfo imageInfo;
imageInfo.imageView = srcTextureView;
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.sampler = filter == VK_FILTER_LINEAR ? device->emulLinearTextureSampler : device->emulNearestTextureSampler;
imageInfo.sampler = mipSampler;
VkWriteDescriptorSet writeDescriptorSet = {};
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -1034,7 +1072,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0);
createRendertarget(device, dstMipLevel, dstWidth, dstHeight, dstImage, &dstTextureView, &offscreenRenderPass, &offscreenFramebuffer);
createPipeline(device, 1, 4, 1, device->emulTextureToTextureShaderModule, device->emulTextureDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
createPipeline(device, 1, 4, 2, device->emulTextureToTextureShaderModule, device->emulTextureDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
//offscreen rendering
VkClearValue offscreenClearValues =
@ -1085,6 +1123,11 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(vertConstants), &vertConstants);
uint32_t fragConstants[1];
vertConstants[0] = *(uint32_t*)&samplerCI.mipLodBias;
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(fragConstants), &fragConstants);
rpi_vkCmdDraw(commandBuffer, 6, 1, 0, 0);
rpi_vkCmdEndRenderPass(commandBuffer);

View File

@ -332,7 +332,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer)
//TODO handle miplevels according to subresource rage?
uint32_t params[4];
encodeTextureUniform(params,
di->imageView->image->miplevels - 1,
di->imageView->subresourceRange.levelCount - 1,
getTextureDataType(di->imageView->interpretedFormat),
di->imageView->viewType == VK_IMAGE_VIEW_TYPE_CUBE,
0, //TODO cubemap stride
@ -343,7 +343,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer)
di->sampler->magFilter == VK_FILTER_NEAREST,
getWrapMode(di->sampler->addressModeU),
getWrapMode(di->sampler->addressModeV),
0 //TODO no auto LOD
di->sampler->disableAutoLod
);
uint32_t size = 0;
@ -367,7 +367,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer)
//TODO handle this properly
//TMU0_B requires an extra uniform written
//we need to signal that somehow from API side
if(di->sampler->mipLodBias > 0.0f)
if(di->sampler->mipLodBias > 0.0f || di->sampler->disableAutoLod)
{
size += 4;
}

View File

@ -214,13 +214,13 @@ VkResult rpi_vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipeline
memcpy(pip->names[idx], pCreateInfos[c].pStages[d].pName, strlen(pCreateInfos[c].pStages[d].pName)+1);
//patch fragment shader
if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_FRAGMENT_BIT)
{
//TODO we could patch the fragment shader, but it would have a lot of edge cases
//since the user is writing assembly we can just let them have full control
//patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pCreateInfos[c].pColorBlendState->pAttachments, pAllocator);
// if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_FRAGMENT_BIT)
// {
// //TODO we could patch the fragment shader, but it would have a lot of edge cases
// //since the user is writing assembly we can just let them have full control
// //patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pCreateInfos[c].pColorBlendState->pAttachments, pAllocator);
//TODO if debug...
// //TODO if debug...
// for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT] / 8; ++e)
// {
// printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT][e]);
@ -228,12 +228,12 @@ VkResult rpi_vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipeline
// }
// printf("\n");
s->bos[RPI_ASSEMBLY_TYPE_FRAGMENT] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT]);
}
// s->bos[RPI_ASSEMBLY_TYPE_FRAGMENT] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT]);
// }
if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_VERTEX_BIT)
{
//TODO if debug...
// if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_VERTEX_BIT)
// {
// //TODO if debug...
// for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_VERTEX] / 8; ++e)
// {
// printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_VERTEX][e]);
@ -248,9 +248,9 @@ VkResult rpi_vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipeline
// }
// printf("\n");
s->bos[RPI_ASSEMBLY_TYPE_COORDINATE] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE], &s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE]);
s->bos[RPI_ASSEMBLY_TYPE_VERTEX] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_VERTEX], &s->sizes[RPI_ASSEMBLY_TYPE_VERTEX]);
}
// s->bos[RPI_ASSEMBLY_TYPE_COORDINATE] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE], &s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE]);
// s->bos[RPI_ASSEMBLY_TYPE_VERTEX] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_VERTEX], &s->sizes[RPI_ASSEMBLY_TYPE_VERTEX]);
// }
}
pip->vertexAttributeDescriptionCount = pCreateInfos[c].pVertexInputState->vertexAttributeDescriptionCount;

View File

@ -215,30 +215,19 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo);
}
uint32_t width = 0, height = 0, bpp = 0;
uint32_t bpp = 0;
cb->binCl.currMarker->width = fb->width;
cb->binCl.currMarker->height = fb->height;
if(writeImage)
{
width = writeImage->width;
height = writeImage->height;
bpp = getFormatBpp(writeImage->format);
}
else if(writeMSAAimage)
{
width = writeMSAAimage->width;
height = writeMSAAimage->height;
bpp = getFormatBpp(writeMSAAimage->format);
}
else if(writeDepthStencilImage)
{
width = writeDepthStencilImage->width;
height = writeDepthStencilImage->height;
}
else if(writeMSAAdepthStencilImage)
{
width = writeMSAAdepthStencilImage->width;
height = writeMSAAdepthStencilImage->height;
}
clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length);
clInsertTileBinningModeConfiguration(&commandBuffer->binCl,
@ -248,7 +237,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
0, //auto initialize tile state data array
bpp == 64, //64 bit color mode
writeMSAAimage || writeMSAAdepthStencilImage || performResolve ? 1 : 0, //msaa
width, height,
cb->binCl.currMarker->width, cb->binCl.currMarker->height,
0, //tile state data array address
0, //tile allocation memory size
0); //tile allocation memory address

View File

@ -32,6 +32,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSampler(
s->maxLod = pCreateInfo->maxLod;
s->borderColor = pCreateInfo->borderColor;
s->unnormalizedCoordinates = pCreateInfo->unnormalizedCoordinates;
s->disableAutoLod = 0;
*pSampler = s;

View File

@ -112,6 +112,15 @@ VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkPhysicalDevice physicalDev
}
shader->sizes[c] = size;
for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e)
{
printf("%#llx ", shader->instructions[c][e]);
disassemble_qpu_asm(shader->instructions[c][e]);
}
printf("\n");
shader->bos[c] = vc4_bo_alloc_shader(controlFd, shader->instructions[c], &shader->sizes[c]);
}
else
{

View File

@ -1371,7 +1371,7 @@ void CreateTexture()
VkBufferImageCopy bufferCopyRegion = {};
bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
bufferCopyRegion.imageSubresource.mipLevel = 1;
bufferCopyRegion.imageSubresource.mipLevel = 0;
bufferCopyRegion.imageSubresource.baseArrayLayer = 0;
bufferCopyRegion.imageSubresource.layerCount = 1;
bufferCopyRegion.imageExtent.width = width;
@ -1433,6 +1433,7 @@ void CreateTexture()
vkAllocateCommandBuffers(device, &allocInfo, &mipgenCommandBuffer);
//for(uint32_t c = 1; c < 2; ++c)
for(uint32_t c = 1; c < mipLevels; ++c)
{
VkImageBlit imageBlit = {};
@ -1561,6 +1562,8 @@ void CreateTexture()
sampler.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
vkCreateSampler(device, &sampler, 0, &textureSampler);
}
//exit(0);
}
void CreateDescriptorSet()

View File

@ -1337,7 +1337,7 @@ void CreateTexture()
VkBufferImageCopy bufferCopyRegion = {};
bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
bufferCopyRegion.imageSubresource.mipLevel = 1;
bufferCopyRegion.imageSubresource.mipLevel = 0;
bufferCopyRegion.imageSubresource.baseArrayLayer = 0;
bufferCopyRegion.imageSubresource.layerCount = 1;
bufferCopyRegion.imageExtent.width = width;