diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index 8675429..0b57f25 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -36,6 +36,7 @@ typedef struct CLMarker void* perfmonID; uint32_t clearColor[2]; uint32_t clearDepth, clearStencil; + uint32_t width, height; //render w/h //pointers that point to where all the other CL data is //plus sizes diff --git a/driver/command.c b/driver/command.c index e48944d..798705e 100644 --- a/driver/command.c +++ b/driver/command.c @@ -434,28 +434,17 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( uint32_t widthInTiles = 0, heightInTiles = 0; uint32_t width = 0, height = 0, bpp = 0; + width = marker->width; + height = marker->height; + if(writeImage) { - width = writeImage->width; - height = writeImage->height; bpp = getFormatBpp(writeImage->format); } else if(writeMSAAimage) { - width = writeMSAAimage->width; - height = writeMSAAimage->height; bpp = getFormatBpp(writeMSAAimage->format); } - else if(writeDepthStencilImage) - { - width = writeDepthStencilImage->width; - height = writeDepthStencilImage->height; - } - else if(writeMSAAdepthStencilImage) - { - width = writeMSAAdepthStencilImage->width; - height = writeMSAAdepthStencilImage->height; - } if(bpp == 64) { @@ -509,7 +498,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( submitCl.shader_rec_count = marker->shaderRecCount; submitCl.uniforms_size = marker->uniformsSize; - /** + /**/ printf("BCL:\n"); clDump(((uint8_t*)marker) + sizeof(CLMarker), marker->size); printf("BO handles: "); diff --git a/driver/common.c b/driver/common.c index f0ab67e..798cca2 100644 --- a/driver/common.c +++ b/driver/common.c @@ -678,7 +678,7 @@ void encodeTextureUniform(uint32_t* params, //array of 4 uint32_t params[2] = 0 | (noAutoLod & 0x1) | (uint32_t)(cubemapStride & 0x3ffff) << 12 - | (uint32_t)(isCubeMap ? 1 : 0) << 30; + | (uint32_t)(isCubeMap || noAutoLod ? 1 : 0) << 30; //TODO //child images diff --git a/driver/common.h b/driver/common.h index 85f624f..698fdd5 100644 --- a/driver/common.h +++ b/driver/common.h @@ -422,6 +422,7 @@ typedef struct VkSampler_T VkSamplerMipmapMode mipmapMode; VkSamplerAddressMode addressModeU, addressModeV, addressModeW; float mipLodBias; + uint32_t disableAutoLod; VkBool32 anisotropyEnable; float maxAnisotropy; VkBool32 compareEnable; diff --git a/driver/copy.c b/driver/copy.c index acbae2e..d2e90bb 100644 --- a/driver/copy.c +++ b/driver/copy.c @@ -136,13 +136,17 @@ void createSampler(VkDevice device, VkSampler* nearestTextureSampler, VkSampler* sampler.mipLodBias = 0.0f; sampler.compareOp = VK_COMPARE_OP_NEVER; sampler.minLod = 0.0f; - sampler.maxLod = 999.0f; + sampler.maxLod = 0.0f; sampler.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; rpi_vkCreateSampler(device, &sampler, 0, nearestTextureSampler); + _sampler* s = nearestTextureSampler; + s->disableAutoLod = 1; sampler.magFilter = VK_FILTER_LINEAR; sampler.minFilter = VK_FILTER_LINEAR; rpi_vkCreateSampler(device, &sampler, 0, linearTextureSampler); + s = linearTextureSampler; + s->disableAutoLod = 1; } void createRendertarget(VkDevice device, uint32_t baseMip, uint32_t width, uint32_t height, VkImage textureImage, VkImageView* textureView, VkRenderPass* offscreenRenderPass, VkFramebuffer* offscreenFramebuffer) @@ -150,6 +154,11 @@ void createRendertarget(VkDevice device, uint32_t baseMip, uint32_t width, uint3 _image* img = textureImage; VkFormat format = img->format; + printf("\nCopy Create RT\n"); + printf("baseMip %u\n", baseMip); + printf("width %u\n", width); + printf("height %u\n", height); + //we can't render to an ETC1 texture, so we'll just stick with RGBA8 for now if(img->format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK) { @@ -723,9 +732,10 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha ///r0 = varyingY * W "sig_none ; r2 = fadd.always(r0, r5, pay_zw, vary) ; r0 = fmul.always(a, b) ;" ///r3 = r0 + r5 (C) - "sig_none ; r3 = fadd.pm.always(r0, r5) ; nop = nop(r0, r0) ;" + "sig_none ; r3 = fadd.pm.always(r0, r5, nop, uni) ; r0 = v8min.always(b, b) ;" ///write texture addresses (x, y) ///writing tmu0_s signals that all coordinates are written + "sig_none ; tmu0_b = or.always(r0, r0) ; nop = nop(r0, r0) ;" "sig_none ; tmu0_t = or.always(r3, r3) ; nop = nop(r0, r0) ;" "sig_none ; tmu0_s = or.always(r2, r2) ; nop = nop(r0, r0) ;" ///suspend thread (after 2 nops) to wait for TMU request to finish @@ -740,6 +750,7 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha "sig_none ; r2 = fmax.pm.always.8c(r4, r4) ; r0.8b = v8min.always(r1, r1) ;" "sig_none ; r3 = fmax.pm.always.8d(r4, r4) ; r0.8c = v8min.always(r2, r2) ;" "sig_none ; nop = nop.pm(r0, r0) ; r0.8d = v8min.always(r3, r3) ;" + ///"sig_small_imm; r0 = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;" "sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;" "sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" "sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;" @@ -792,6 +803,15 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha //fragment shader uniforms + { + VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT, + VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type + 0, //descriptor set # + 0, //descriptor binding # + 0, //descriptor array element # + 0, //resource offset + VK_SHADER_STAGE_FRAGMENT_BIT + }, { VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, //descriptor type @@ -800,7 +820,8 @@ void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitSha 0, //descriptor array element # 0, //resource offset VK_SHADER_STAGE_FRAGMENT_BIT - } + }, + }; VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {}; @@ -998,6 +1019,24 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage( VkPipeline blitPipeline; VkPipelineLayout blitPipelineLayout; + VkSampler mipSampler; + VkSamplerCreateInfo samplerCI = {}; + samplerCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerCI.magFilter = filter == VK_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + samplerCI.minFilter = filter == VK_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerCI.mipLodBias = srcMipLevel; + samplerCI.compareOp = VK_COMPARE_OP_NEVER; + samplerCI.minLod = 0.0f; + samplerCI.maxLod = 0.0f; + samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + rpi_vkCreateSampler(device, &samplerCI, 0, &mipSampler); + _sampler* s = mipSampler; + s->disableAutoLod = 1; + VkImageViewCreateInfo view = {}; view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; view.viewType = VK_IMAGE_VIEW_TYPE_2D; @@ -1010,7 +1049,6 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage( view.image = srcImage; rpi_vkCreateImageView(device, &view, 0, &srcTextureView); - //TODO this crashes somehow //create blit descriptor set VkDescriptorSetAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; @@ -1022,7 +1060,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage( VkDescriptorImageInfo imageInfo; imageInfo.imageView = srcTextureView; imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - imageInfo.sampler = filter == VK_FILTER_LINEAR ? device->emulLinearTextureSampler : device->emulNearestTextureSampler; + imageInfo.sampler = mipSampler; VkWriteDescriptorSet writeDescriptorSet = {}; writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -1034,7 +1072,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage( rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0); createRendertarget(device, dstMipLevel, dstWidth, dstHeight, dstImage, &dstTextureView, &offscreenRenderPass, &offscreenFramebuffer); - createPipeline(device, 1, 4, 1, device->emulTextureToTextureShaderModule, device->emulTextureDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline); + createPipeline(device, 1, 4, 2, device->emulTextureToTextureShaderModule, device->emulTextureDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline); //offscreen rendering VkClearValue offscreenClearValues = @@ -1085,6 +1123,11 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage( rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(vertConstants), &vertConstants); + uint32_t fragConstants[1]; + vertConstants[0] = *(uint32_t*)&samplerCI.mipLodBias; + + rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(fragConstants), &fragConstants); + rpi_vkCmdDraw(commandBuffer, 6, 1, 0, 0); rpi_vkCmdEndRenderPass(commandBuffer); diff --git a/driver/draw.c b/driver/draw.c index f7ea2c5..64609df 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -332,7 +332,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer) //TODO handle miplevels according to subresource rage? uint32_t params[4]; encodeTextureUniform(params, - di->imageView->image->miplevels - 1, + di->imageView->subresourceRange.levelCount - 1, getTextureDataType(di->imageView->interpretedFormat), di->imageView->viewType == VK_IMAGE_VIEW_TYPE_CUBE, 0, //TODO cubemap stride @@ -343,7 +343,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer) di->sampler->magFilter == VK_FILTER_NEAREST, getWrapMode(di->sampler->addressModeU), getWrapMode(di->sampler->addressModeV), - 0 //TODO no auto LOD + di->sampler->disableAutoLod ); uint32_t size = 0; @@ -367,7 +367,7 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer) //TODO handle this properly //TMU0_B requires an extra uniform written //we need to signal that somehow from API side - if(di->sampler->mipLodBias > 0.0f) + if(di->sampler->mipLodBias > 0.0f || di->sampler->disableAutoLod) { size += 4; } diff --git a/driver/pipeline.c b/driver/pipeline.c index dba8c6c..6b970fc 100644 --- a/driver/pipeline.c +++ b/driver/pipeline.c @@ -214,13 +214,13 @@ VkResult rpi_vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipeline memcpy(pip->names[idx], pCreateInfos[c].pStages[d].pName, strlen(pCreateInfos[c].pStages[d].pName)+1); //patch fragment shader - if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_FRAGMENT_BIT) - { - //TODO we could patch the fragment shader, but it would have a lot of edge cases - //since the user is writing assembly we can just let them have full control - //patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pCreateInfos[c].pColorBlendState->pAttachments, pAllocator); +// if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_FRAGMENT_BIT) +// { +// //TODO we could patch the fragment shader, but it would have a lot of edge cases +// //since the user is writing assembly we can just let them have full control +// //patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pCreateInfos[c].pColorBlendState->pAttachments, pAllocator); - //TODO if debug... +// //TODO if debug... // for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT] / 8; ++e) // { // printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT][e]); @@ -228,12 +228,12 @@ VkResult rpi_vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipeline // } // printf("\n"); - s->bos[RPI_ASSEMBLY_TYPE_FRAGMENT] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT]); - } +// s->bos[RPI_ASSEMBLY_TYPE_FRAGMENT] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT]); +// } - if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_VERTEX_BIT) - { - //TODO if debug... +// if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_VERTEX_BIT) +// { +// //TODO if debug... // for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_VERTEX] / 8; ++e) // { // printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_VERTEX][e]); @@ -248,9 +248,9 @@ VkResult rpi_vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipeline // } // printf("\n"); - s->bos[RPI_ASSEMBLY_TYPE_COORDINATE] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE], &s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE]); - s->bos[RPI_ASSEMBLY_TYPE_VERTEX] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_VERTEX], &s->sizes[RPI_ASSEMBLY_TYPE_VERTEX]); - } +// s->bos[RPI_ASSEMBLY_TYPE_COORDINATE] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE], &s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE]); +// s->bos[RPI_ASSEMBLY_TYPE_VERTEX] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_VERTEX], &s->sizes[RPI_ASSEMBLY_TYPE_VERTEX]); +// } } pip->vertexAttributeDescriptionCount = pCreateInfos[c].pVertexInputState->vertexAttributeDescriptionCount; diff --git a/driver/renderpass.c b/driver/renderpass.c index 525e490..b38e79d 100644 --- a/driver/renderpass.c +++ b/driver/renderpass.c @@ -215,30 +215,19 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB clGetHandleIndex(&commandBuffer->handlesCl, commandBuffer->binCl.currMarker->handlesBuf, commandBuffer->binCl.currMarker->handlesSize, writeMSAAdepthStencilImage->boundMem->bo); } - uint32_t width = 0, height = 0, bpp = 0; + uint32_t bpp = 0; + + cb->binCl.currMarker->width = fb->width; + cb->binCl.currMarker->height = fb->height; if(writeImage) { - width = writeImage->width; - height = writeImage->height; bpp = getFormatBpp(writeImage->format); } else if(writeMSAAimage) { - width = writeMSAAimage->width; - height = writeMSAAimage->height; bpp = getFormatBpp(writeMSAAimage->format); } - else if(writeDepthStencilImage) - { - width = writeDepthStencilImage->width; - height = writeDepthStencilImage->height; - } - else if(writeMSAAdepthStencilImage) - { - width = writeMSAAdepthStencilImage->width; - height = writeMSAAdepthStencilImage->height; - } clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length); clInsertTileBinningModeConfiguration(&commandBuffer->binCl, @@ -248,7 +237,7 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB 0, //auto initialize tile state data array bpp == 64, //64 bit color mode writeMSAAimage || writeMSAAdepthStencilImage || performResolve ? 1 : 0, //msaa - width, height, + cb->binCl.currMarker->width, cb->binCl.currMarker->height, 0, //tile state data array address 0, //tile allocation memory size 0); //tile allocation memory address diff --git a/driver/sampler.c b/driver/sampler.c index 6815405..3772742 100644 --- a/driver/sampler.c +++ b/driver/sampler.c @@ -32,6 +32,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateSampler( s->maxLod = pCreateInfo->maxLod; s->borderColor = pCreateInfo->borderColor; s->unnormalizedCoordinates = pCreateInfo->unnormalizedCoordinates; + s->disableAutoLod = 0; *pSampler = s; diff --git a/driver/vkExtFunctions.c b/driver/vkExtFunctions.c index 253858f..2683c8d 100644 --- a/driver/vkExtFunctions.c +++ b/driver/vkExtFunctions.c @@ -112,6 +112,15 @@ VkResult rpi_vkCreateShaderModuleFromRpiAssemblyEXT(VkPhysicalDevice physicalDev } shader->sizes[c] = size; + + + for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e) + { + printf("%#llx ", shader->instructions[c][e]); + disassemble_qpu_asm(shader->instructions[c][e]); + } + printf("\n"); + shader->bos[c] = vc4_bo_alloc_shader(controlFd, shader->instructions[c], &shader->sizes[c]); } else { diff --git a/test/mipmapping/mipmapping.cpp b/test/mipmapping/mipmapping.cpp index 8896386..332e75f 100644 --- a/test/mipmapping/mipmapping.cpp +++ b/test/mipmapping/mipmapping.cpp @@ -1371,7 +1371,7 @@ void CreateTexture() VkBufferImageCopy bufferCopyRegion = {}; bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - bufferCopyRegion.imageSubresource.mipLevel = 1; + bufferCopyRegion.imageSubresource.mipLevel = 0; bufferCopyRegion.imageSubresource.baseArrayLayer = 0; bufferCopyRegion.imageSubresource.layerCount = 1; bufferCopyRegion.imageExtent.width = width; @@ -1433,6 +1433,7 @@ void CreateTexture() vkAllocateCommandBuffers(device, &allocInfo, &mipgenCommandBuffer); + //for(uint32_t c = 1; c < 2; ++c) for(uint32_t c = 1; c < mipLevels; ++c) { VkImageBlit imageBlit = {}; @@ -1561,6 +1562,8 @@ void CreateTexture() sampler.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; vkCreateSampler(device, &sampler, 0, &textureSampler); } + + //exit(0); } void CreateDescriptorSet() diff --git a/test/texturing/texturing.cpp b/test/texturing/texturing.cpp index 5fcc97a..cef0acc 100644 --- a/test/texturing/texturing.cpp +++ b/test/texturing/texturing.cpp @@ -1337,7 +1337,7 @@ void CreateTexture() VkBufferImageCopy bufferCopyRegion = {}; bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - bufferCopyRegion.imageSubresource.mipLevel = 1; + bufferCopyRegion.imageSubresource.mipLevel = 0; bufferCopyRegion.imageSubresource.baseArrayLayer = 0; bufferCopyRegion.imageSubresource.layerCount = 1; bufferCopyRegion.imageExtent.width = width;