From 03f98aff824d9ad23c9b5dd925f08671d951de9b Mon Sep 17 00:00:00 2001 From: yours3lf <0.tamas.marton@gmail.com> Date: Fri, 1 May 2020 19:38:13 +0100 Subject: [PATCH] trying to get attrib setup finally right --- driver/command.c | 5 ++-- driver/common.h | 2 ++ driver/copy.c | 10 +++---- driver/draw.c | 41 ++++++++++++++++++-------- driver/instance.c | 50 ++++++++++++++++---------------- driver/kernelInterface.c | 5 ++-- driver/shader.c | 62 ++++++++++++++++++++++++++++------------ 7 files changed, 111 insertions(+), 64 deletions(-) diff --git a/driver/command.c b/driver/command.c index 563e76a..f69ab94 100644 --- a/driver/command.c +++ b/driver/command.c @@ -551,10 +551,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( printf("\nUniforms: "); for(int d = 0; d < marker->uniformsSize / 4; ++d) { - printf("%u ", *((uint32_t*)(marker->uniformsBuf)+d)); + printf("%i ", *((uint32_t*)(marker->uniformsBuf)+d)); } printf("\nShader recs: "); - uint8_t* ptr = marker->shaderRecBuf + (3 + 1) * 4; + uint8_t* ptr = marker->shaderRecBuf + (3 + 2) * 4; for(int d = 0; d < marker->shaderRecCount; ++d) { uint8_t flags = *ptr; @@ -607,6 +607,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit( numAttribs += (vertexAttribSelectBits & (1 << e)) >> e; } + printf("\nnumattribs: %i", numAttribs); for(uint8_t e = 0; e < numAttribs; ++e) { uint32_t attribBaseAddress = *(uint32_t*)ptr; ptr+=4; diff --git a/driver/common.h b/driver/common.h index 84eb2d7..1183bda 100644 --- a/driver/common.h +++ b/driver/common.h @@ -252,6 +252,8 @@ typedef struct VkShaderModule_T uint32_t numMappings[VK_RPI_ASSEMBLY_TYPE_MAX]; uint32_t hasThreadSwitch; uint32_t numVaryings; + uint32_t numVertVPMWrites; + uint32_t numCoordVPMWrites; } _shaderModule; typedef struct VkDescriptorSetLayout_T diff --git a/driver/copy.c b/driver/copy.c index 88051a8..4e18f48 100644 --- a/driver/copy.c +++ b/driver/copy.c @@ -148,11 +148,11 @@ void createRendertarget(VkDevice device, uint32_t baseLayer, uint32_t baseMip, u _image* img = textureImage; VkFormat format = img->format; - printf("\nCopy Create RT\n"); - printf("baseLayer %u\n", baseLayer); - printf("baseMip %u\n", baseMip); - printf("width %u\n", width); - printf("height %u\n", height); +// printf("\nCopy Create RT\n"); +// printf("baseLayer %u\n", baseLayer); +// printf("baseMip %u\n", baseMip); +// printf("width %u\n", width); +// printf("height %u\n", height); //we can't render to an ETC1 texture, so we'll just stick with RGBA8 for now if(img->format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK) diff --git a/driver/draw.c b/driver/draw.c index 9c4b1a5..b8f7507 100644 --- a/driver/draw.c +++ b/driver/draw.c @@ -184,11 +184,15 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) } } - uint32_t attribSize = 0; - for(uint32_t c = 0; c < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++c) - { - attribSize += getFormatBpp(cb->graphicsPipeline->vertexAttributeDescriptions[c].format) >> 3; - } + //TODO + //attrib size is simply how many times we wrote VPM x 4bytes minus the usual stuff (so 3x4bytes for Xs/Ys etc.) + //for CS it's always 12 + + //for attrib offsets + //vertex coords will obviouslly have offset 0 + //coord offsets will always be 12 unless it's vertex coords then it's 0 + //the rest: + //for VS we need to add the size of Xs/Ys, Zs, and 1/Wc (+point size if ever), so 3x4bytes //number of attribs //3 is the number of type of possible shaders @@ -210,24 +214,28 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) fragCode, //fragment code address 0, //TODO vertex number of used uniforms? attribSelectBits, //vertex attribute array select bits - attribSize, //vertex total attribute size + vertModule->numVertVPMWrites * 4 - 12, //vertex total attribute size 0, //vertex uniform address vertCode, //vertex shader code address 0, //TODO coordinate number of used uniforms? //TODO how do we know which attribute contains the vertices? //for now the first one will be hardcoded to have the vertices... 1 << 0, //coordinate attribute array select bits - getFormatBpp(cb->graphicsPipeline->vertexAttributeDescriptions[0].format) >> 3, //coordinate total attribute size + 12, //coordinate total attribute size 0, //coordinate uniform address coordCode //coordinate shader code address ); uint32_t vertexAttribOffsets[8] = {}; - for(uint32_t c = 0 ; c < 8; ++c) + uint32_t coordAttribOffsets[8] = {}; + vertexAttribOffsets[1] = 12; + coordAttribOffsets[1] = 12; + for(uint32_t c = 2 ; c < 8; ++c) { - for(uint32_t d = 0 ; d < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++d) + coordAttribOffsets[c] = 12; + for(uint32_t d = 0; d < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++d) { - if(cb->graphicsPipeline->vertexAttributeDescriptions[d].binding < c) + if(cb->graphicsPipeline->vertexAttributeDescriptions[d].location < c && cb->graphicsPipeline->vertexAttributeDescriptions[d].location > 0) { vertexAttribOffsets[c] += cb->graphicsPipeline->vertexBindingDescriptions[cb->graphicsPipeline->vertexAttributeDescriptions[d].binding].stride; } @@ -252,6 +260,15 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) - cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundOffset - formatByteSize) / stride; +// fprintf(stderr, "usedIndices %i\n", usedIndices); +// fprintf(stderr, "boundMemsize %i\n", cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundMem->size); +// fprintf(stderr, "vertexattrib offset %i\n", cb->graphicsPipeline->vertexAttributeDescriptions[c].offset); +// fprintf(stderr, "vertex offset %i\n", vertexOffset * stride); +// fprintf(stderr, "vertex buffer offset %i\n", cb->vertexBufferOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]); +// fprintf(stderr, "bound offset %i\n", cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundOffset); +// fprintf(stderr, "format size %i\n", formatByteSize); +// fprintf(stderr, "stride %i\n", stride); + if(usedIndices < maxIndex) { maxIndex = usedIndices; @@ -275,8 +292,8 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset) vertexBuffer, //reloc address formatByteSize, stride, - cb->graphicsPipeline->vertexAttributeDescriptions[c].offset + vertexAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding], //vertex vpm offset - cb->graphicsPipeline->vertexAttributeDescriptions[c].offset + vertexAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding] //coordinte vpm offset + vertexAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].location], //vertex vpm offset + coordAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].location] //coordinte vpm offset ); } } diff --git a/driver/instance.c b/driver/instance.c index 8d35a0e..54783b8 100644 --- a/driver/instance.c +++ b/driver/instance.c @@ -203,31 +203,31 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateInstance( char IDstring[] = { 0, 0, 0, 0 }; memcpy(IDstring, &(*pInstance)->IDstrUINT, 3); - printf("------------------------------------------\n"); - printf("------------------------------------------\n"); - printf("V3D chip info: \n"); - printf("IDstring %s\n", IDstring); - printf("technologyVersion: %u\n", (*pInstance)->technologyVersion); - printf("v3dRevision %u\n", (*pInstance)->v3dRevision); - printf("vpmMemorySize %u\n", (*pInstance)->vpmMemorySize); - printf("numSemaphores %u\n", (*pInstance)->numSemaphores); - printf("numTMUperSlice %u\n", (*pInstance)->numTMUperSlice); - printf("numQPUperSlice %u\n", (*pInstance)->numQPUperSlice); - printf("numSlices %u\n", (*pInstance)->numSlices); - printf("tileBufferSize %s\n", (*pInstance)->tileBufferSize > 0 ? - (*pInstance)->tileBufferSize > 1 ? "full" : "half" : "quarter"); - printf("vriMemorySize %s\n", (*pInstance)->vriMemorySize ? "full" : "half"); - printf("hdrSupported %u\n", (*pInstance)->hdrSupported); - printf("tileBufferDoubleBufferModeSupported %u\n", (*pInstance)-> tileBufferDoubleBufferModeSupported); - printf("hasTiling %u\n", (*pInstance)->hasTiling); - printf("hasControlFlow %u\n", (*pInstance)->hasControlFlow); - printf("hasEtc1 %u\n", (*pInstance)->hasEtc1); - printf("hasThreadedFs %u\n", (*pInstance)->hasThreadedFs); - printf("hasMadvise %u\n", (*pInstance)->hasMadvise); - printf("hasPerfmon %u\n", (*pInstance)->hasPerfmon); - printf("hasFixedRCLorder %u\n", (*pInstance)->hasFixedRCLorder); - printf("------------------------------------------\n"); - printf("------------------------------------------\n"); +// printf("------------------------------------------\n"); +// printf("------------------------------------------\n"); +// printf("V3D chip info: \n"); +// printf("IDstring %s\n", IDstring); +// printf("technologyVersion: %u\n", (*pInstance)->technologyVersion); +// printf("v3dRevision %u\n", (*pInstance)->v3dRevision); +// printf("vpmMemorySize %u\n", (*pInstance)->vpmMemorySize); +// printf("numSemaphores %u\n", (*pInstance)->numSemaphores); +// printf("numTMUperSlice %u\n", (*pInstance)->numTMUperSlice); +// printf("numQPUperSlice %u\n", (*pInstance)->numQPUperSlice); +// printf("numSlices %u\n", (*pInstance)->numSlices); +// printf("tileBufferSize %s\n", (*pInstance)->tileBufferSize > 0 ? +// (*pInstance)->tileBufferSize > 1 ? "full" : "half" : "quarter"); +// printf("vriMemorySize %s\n", (*pInstance)->vriMemorySize ? "full" : "half"); +// printf("hdrSupported %u\n", (*pInstance)->hdrSupported); +// printf("tileBufferDoubleBufferModeSupported %u\n", (*pInstance)-> tileBufferDoubleBufferModeSupported); +// printf("hasTiling %u\n", (*pInstance)->hasTiling); +// printf("hasControlFlow %u\n", (*pInstance)->hasControlFlow); +// printf("hasEtc1 %u\n", (*pInstance)->hasEtc1); +// printf("hasThreadedFs %u\n", (*pInstance)->hasThreadedFs); +// printf("hasMadvise %u\n", (*pInstance)->hasMadvise); +// printf("hasPerfmon %u\n", (*pInstance)->hasPerfmon); +// printf("hasFixedRCLorder %u\n", (*pInstance)->hasFixedRCLorder); +// printf("------------------------------------------\n"); +// printf("------------------------------------------\n"); assert((*pInstance)->hasTiling); assert((*pInstance)->hasControlFlow); diff --git a/driver/kernelInterface.c b/driver/kernelInterface.c index 06e9c6a..c2d3678 100644 --- a/driver/kernelInterface.c +++ b/driver/kernelInterface.c @@ -303,7 +303,7 @@ int vc4_bo_wait(int fd, uint32_t bo, uint64_t timeout_ns) .timeout_ns = timeout_ns, }; - printf("Wait for BO: %u\n", bo); + //printf("Wait for BO: %u\n", bo); int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait); if (ret) { @@ -335,7 +335,7 @@ int vc4_seqno_wait(int fd, uint64_t* lastFinishedSeqno, uint64_t seqno, uint64_t .timeout_ns = *timeout_ns, }; - printf("Wait for seqno: %llu\n", seqno); + //printf("Wait for seqno: %llu\n", seqno); int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait); if (ret) { @@ -577,6 +577,7 @@ void vc4_cl_submit(int fd, struct drm_vc4_submit_cl* submit, uint64_t* lastEmitt fprintf(stderr, "Draw call returned %s. " "Expect corruption.\n", strerror(errno)); warned = 1; + assert(0); } else if (!ret) { *lastEmittedSeqno = submit->seqno; } diff --git a/driver/shader.c b/driver/shader.c index e6468dc..77f4386 100644 --- a/driver/shader.c +++ b/driver/shader.c @@ -37,6 +37,9 @@ VkResult rpi_vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInf } shader->hasThreadSwitch = 0; + shader->numVaryings = 0; + shader->numCoordVPMWrites = 0; + shader->numVertVPMWrites = 0; uint32_t hadVertex = 0, hadCoordinate = 0; @@ -64,42 +67,65 @@ VkResult rpi_vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInf break; } } + + for(uint64_t d = 0; d < ci->numInstructions[c]; ++d) + { + unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74; + unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << 60)) >> 60); + + //if it's an ALU instruction + if(!is_sem && sig_bits != 14 && sig_bits != 15) + { + unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << 18)) >> 18); + unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << 12)) >> 12); + + if(raddr_a == 35) + { + shader->numVaryings++; + } + + //don't count small immediates + if(sig_bits != 13 && raddr_b == 35) + { + shader->numVaryings++; + } + } + } } - shader->numVaryings = 0; - for(uint64_t d = 0; d < ci->numInstructions[c]; ++d) + + if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX || c == VK_RPI_ASSEMBLY_TYPE_COORDINATE) { - unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74; - unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << 60)) >> 60); - - //if it's an ALU instruction - if(!is_sem && sig_bits != 14 && sig_bits != 15) + for(uint64_t d = 0; d < ci->numInstructions[c]; ++d) { - unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << 18)) >> 18); - unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << 12)) >> 12); + unsigned waddr_add = ((ci->instructions[c][d] & (0x3fll << 38)) >> 38); + unsigned waddr_mul = ((ci->instructions[c][d] & (0x3fll << 32)) >> 32); - if(raddr_a == 35) + if(waddr_add == 48 || waddr_mul == 48) { - shader->numVaryings++; - } - - //don't count small immediates - if(sig_bits != 13 && raddr_b == 35) - { - shader->numVaryings++; + if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX) + { + shader->numVertVPMWrites++; + } + else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE) + { + shader->numCoordVPMWrites++; + } } } } shader->sizes[c] = ci->numInstructions[c]*sizeof(uint64_t); - + /** for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e) { printf("%#llx ", ci->instructions[c][e]); disassemble_qpu_asm(ci->instructions[c][e]); } printf("\n"); + /**/ + shader->bos[c] = vc4_bo_alloc_shader(controlFd, ci->instructions[c], &shader->sizes[c]); } else