1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-02-19 16:54:18 +01:00

trying to get attrib setup finally right

This commit is contained in:
yours3lf 2020-05-01 19:38:13 +01:00
parent cae330c48e
commit 03f98aff82
7 changed files with 111 additions and 64 deletions

View File

@ -551,10 +551,10 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
printf("\nUniforms: ");
for(int d = 0; d < marker->uniformsSize / 4; ++d)
{
printf("%u ", *((uint32_t*)(marker->uniformsBuf)+d));
printf("%i ", *((uint32_t*)(marker->uniformsBuf)+d));
}
printf("\nShader recs: ");
uint8_t* ptr = marker->shaderRecBuf + (3 + 1) * 4;
uint8_t* ptr = marker->shaderRecBuf + (3 + 2) * 4;
for(int d = 0; d < marker->shaderRecCount; ++d)
{
uint8_t flags = *ptr;
@ -607,6 +607,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
numAttribs += (vertexAttribSelectBits & (1 << e)) >> e;
}
printf("\nnumattribs: %i", numAttribs);
for(uint8_t e = 0; e < numAttribs; ++e)
{
uint32_t attribBaseAddress = *(uint32_t*)ptr; ptr+=4;

View File

@ -252,6 +252,8 @@ typedef struct VkShaderModule_T
uint32_t numMappings[VK_RPI_ASSEMBLY_TYPE_MAX];
uint32_t hasThreadSwitch;
uint32_t numVaryings;
uint32_t numVertVPMWrites;
uint32_t numCoordVPMWrites;
} _shaderModule;
typedef struct VkDescriptorSetLayout_T

View File

@ -148,11 +148,11 @@ void createRendertarget(VkDevice device, uint32_t baseLayer, uint32_t baseMip, u
_image* img = textureImage;
VkFormat format = img->format;
printf("\nCopy Create RT\n");
printf("baseLayer %u\n", baseLayer);
printf("baseMip %u\n", baseMip);
printf("width %u\n", width);
printf("height %u\n", height);
// printf("\nCopy Create RT\n");
// printf("baseLayer %u\n", baseLayer);
// printf("baseMip %u\n", baseMip);
// printf("width %u\n", width);
// printf("height %u\n", height);
//we can't render to an ETC1 texture, so we'll just stick with RGBA8 for now
if(img->format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK)

View File

@ -184,11 +184,15 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
}
}
uint32_t attribSize = 0;
for(uint32_t c = 0; c < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++c)
{
attribSize += getFormatBpp(cb->graphicsPipeline->vertexAttributeDescriptions[c].format) >> 3;
}
//TODO
//attrib size is simply how many times we wrote VPM x 4bytes minus the usual stuff (so 3x4bytes for Xs/Ys etc.)
//for CS it's always 12
//for attrib offsets
//vertex coords will obviouslly have offset 0
//coord offsets will always be 12 unless it's vertex coords then it's 0
//the rest:
//for VS we need to add the size of Xs/Ys, Zs, and 1/Wc (+point size if ever), so 3x4bytes
//number of attribs
//3 is the number of type of possible shaders
@ -210,24 +214,28 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
fragCode, //fragment code address
0, //TODO vertex number of used uniforms?
attribSelectBits, //vertex attribute array select bits
attribSize, //vertex total attribute size
vertModule->numVertVPMWrites * 4 - 12, //vertex total attribute size
0, //vertex uniform address
vertCode, //vertex shader code address
0, //TODO coordinate number of used uniforms?
//TODO how do we know which attribute contains the vertices?
//for now the first one will be hardcoded to have the vertices...
1 << 0, //coordinate attribute array select bits
getFormatBpp(cb->graphicsPipeline->vertexAttributeDescriptions[0].format) >> 3, //coordinate total attribute size
12, //coordinate total attribute size
0, //coordinate uniform address
coordCode //coordinate shader code address
);
uint32_t vertexAttribOffsets[8] = {};
for(uint32_t c = 0 ; c < 8; ++c)
uint32_t coordAttribOffsets[8] = {};
vertexAttribOffsets[1] = 12;
coordAttribOffsets[1] = 12;
for(uint32_t c = 2 ; c < 8; ++c)
{
for(uint32_t d = 0 ; d < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++d)
coordAttribOffsets[c] = 12;
for(uint32_t d = 0; d < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++d)
{
if(cb->graphicsPipeline->vertexAttributeDescriptions[d].binding < c)
if(cb->graphicsPipeline->vertexAttributeDescriptions[d].location < c && cb->graphicsPipeline->vertexAttributeDescriptions[d].location > 0)
{
vertexAttribOffsets[c] += cb->graphicsPipeline->vertexBindingDescriptions[cb->graphicsPipeline->vertexAttributeDescriptions[d].binding].stride;
}
@ -252,6 +260,15 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
- cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundOffset
- formatByteSize) / stride;
// fprintf(stderr, "usedIndices %i\n", usedIndices);
// fprintf(stderr, "boundMemsize %i\n", cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundMem->size);
// fprintf(stderr, "vertexattrib offset %i\n", cb->graphicsPipeline->vertexAttributeDescriptions[c].offset);
// fprintf(stderr, "vertex offset %i\n", vertexOffset * stride);
// fprintf(stderr, "vertex buffer offset %i\n", cb->vertexBufferOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]);
// fprintf(stderr, "bound offset %i\n", cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundOffset);
// fprintf(stderr, "format size %i\n", formatByteSize);
// fprintf(stderr, "stride %i\n", stride);
if(usedIndices < maxIndex)
{
maxIndex = usedIndices;
@ -275,8 +292,8 @@ static uint32_t drawCommon(VkCommandBuffer commandBuffer, int32_t vertexOffset)
vertexBuffer, //reloc address
formatByteSize,
stride,
cb->graphicsPipeline->vertexAttributeDescriptions[c].offset + vertexAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding], //vertex vpm offset
cb->graphicsPipeline->vertexAttributeDescriptions[c].offset + vertexAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding] //coordinte vpm offset
vertexAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].location], //vertex vpm offset
coordAttribOffsets[cb->graphicsPipeline->vertexAttributeDescriptions[c].location] //coordinte vpm offset
);
}
}

View File

@ -203,31 +203,31 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateInstance(
char IDstring[] = { 0, 0, 0, 0 };
memcpy(IDstring, &(*pInstance)->IDstrUINT, 3);
printf("------------------------------------------\n");
printf("------------------------------------------\n");
printf("V3D chip info: \n");
printf("IDstring %s\n", IDstring);
printf("technologyVersion: %u\n", (*pInstance)->technologyVersion);
printf("v3dRevision %u\n", (*pInstance)->v3dRevision);
printf("vpmMemorySize %u\n", (*pInstance)->vpmMemorySize);
printf("numSemaphores %u\n", (*pInstance)->numSemaphores);
printf("numTMUperSlice %u\n", (*pInstance)->numTMUperSlice);
printf("numQPUperSlice %u\n", (*pInstance)->numQPUperSlice);
printf("numSlices %u\n", (*pInstance)->numSlices);
printf("tileBufferSize %s\n", (*pInstance)->tileBufferSize > 0 ?
(*pInstance)->tileBufferSize > 1 ? "full" : "half" : "quarter");
printf("vriMemorySize %s\n", (*pInstance)->vriMemorySize ? "full" : "half");
printf("hdrSupported %u\n", (*pInstance)->hdrSupported);
printf("tileBufferDoubleBufferModeSupported %u\n", (*pInstance)-> tileBufferDoubleBufferModeSupported);
printf("hasTiling %u\n", (*pInstance)->hasTiling);
printf("hasControlFlow %u\n", (*pInstance)->hasControlFlow);
printf("hasEtc1 %u\n", (*pInstance)->hasEtc1);
printf("hasThreadedFs %u\n", (*pInstance)->hasThreadedFs);
printf("hasMadvise %u\n", (*pInstance)->hasMadvise);
printf("hasPerfmon %u\n", (*pInstance)->hasPerfmon);
printf("hasFixedRCLorder %u\n", (*pInstance)->hasFixedRCLorder);
printf("------------------------------------------\n");
printf("------------------------------------------\n");
// printf("------------------------------------------\n");
// printf("------------------------------------------\n");
// printf("V3D chip info: \n");
// printf("IDstring %s\n", IDstring);
// printf("technologyVersion: %u\n", (*pInstance)->technologyVersion);
// printf("v3dRevision %u\n", (*pInstance)->v3dRevision);
// printf("vpmMemorySize %u\n", (*pInstance)->vpmMemorySize);
// printf("numSemaphores %u\n", (*pInstance)->numSemaphores);
// printf("numTMUperSlice %u\n", (*pInstance)->numTMUperSlice);
// printf("numQPUperSlice %u\n", (*pInstance)->numQPUperSlice);
// printf("numSlices %u\n", (*pInstance)->numSlices);
// printf("tileBufferSize %s\n", (*pInstance)->tileBufferSize > 0 ?
// (*pInstance)->tileBufferSize > 1 ? "full" : "half" : "quarter");
// printf("vriMemorySize %s\n", (*pInstance)->vriMemorySize ? "full" : "half");
// printf("hdrSupported %u\n", (*pInstance)->hdrSupported);
// printf("tileBufferDoubleBufferModeSupported %u\n", (*pInstance)-> tileBufferDoubleBufferModeSupported);
// printf("hasTiling %u\n", (*pInstance)->hasTiling);
// printf("hasControlFlow %u\n", (*pInstance)->hasControlFlow);
// printf("hasEtc1 %u\n", (*pInstance)->hasEtc1);
// printf("hasThreadedFs %u\n", (*pInstance)->hasThreadedFs);
// printf("hasMadvise %u\n", (*pInstance)->hasMadvise);
// printf("hasPerfmon %u\n", (*pInstance)->hasPerfmon);
// printf("hasFixedRCLorder %u\n", (*pInstance)->hasFixedRCLorder);
// printf("------------------------------------------\n");
// printf("------------------------------------------\n");
assert((*pInstance)->hasTiling);
assert((*pInstance)->hasControlFlow);

View File

@ -303,7 +303,7 @@ int vc4_bo_wait(int fd, uint32_t bo, uint64_t timeout_ns)
.timeout_ns = timeout_ns,
};
printf("Wait for BO: %u\n", bo);
//printf("Wait for BO: %u\n", bo);
int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
if (ret) {
@ -335,7 +335,7 @@ int vc4_seqno_wait(int fd, uint64_t* lastFinishedSeqno, uint64_t seqno, uint64_t
.timeout_ns = *timeout_ns,
};
printf("Wait for seqno: %llu\n", seqno);
//printf("Wait for seqno: %llu\n", seqno);
int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
if (ret) {
@ -577,6 +577,7 @@ void vc4_cl_submit(int fd, struct drm_vc4_submit_cl* submit, uint64_t* lastEmitt
fprintf(stderr, "Draw call returned %s. "
"Expect corruption.\n", strerror(errno));
warned = 1;
assert(0);
} else if (!ret) {
*lastEmittedSeqno = submit->seqno;
}

View File

@ -37,6 +37,9 @@ VkResult rpi_vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInf
}
shader->hasThreadSwitch = 0;
shader->numVaryings = 0;
shader->numCoordVPMWrites = 0;
shader->numVertVPMWrites = 0;
uint32_t hadVertex = 0, hadCoordinate = 0;
@ -64,42 +67,65 @@ VkResult rpi_vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInf
break;
}
}
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
{
unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
{
unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << 12)) >> 12);
if(raddr_a == 35)
{
shader->numVaryings++;
}
//don't count small immediates
if(sig_bits != 13 && raddr_b == 35)
{
shader->numVaryings++;
}
}
}
}
shader->numVaryings = 0;
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX || c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
{
unsigned is_sem = ((ci->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((ci->instructions[c][d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
for(uint64_t d = 0; d < ci->numInstructions[c]; ++d)
{
unsigned raddr_a = ((ci->instructions[c][d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((ci->instructions[c][d] & (0x3fll << 12)) >> 12);
unsigned waddr_add = ((ci->instructions[c][d] & (0x3fll << 38)) >> 38);
unsigned waddr_mul = ((ci->instructions[c][d] & (0x3fll << 32)) >> 32);
if(raddr_a == 35)
if(waddr_add == 48 || waddr_mul == 48)
{
shader->numVaryings++;
}
//don't count small immediates
if(sig_bits != 13 && raddr_b == 35)
{
shader->numVaryings++;
if(c == VK_RPI_ASSEMBLY_TYPE_VERTEX)
{
shader->numVertVPMWrites++;
}
else if(c == VK_RPI_ASSEMBLY_TYPE_COORDINATE)
{
shader->numCoordVPMWrites++;
}
}
}
}
shader->sizes[c] = ci->numInstructions[c]*sizeof(uint64_t);
/**
for(uint64_t e = 0; e < shader->sizes[c] / 8; ++e)
{
printf("%#llx ", ci->instructions[c][e]);
disassemble_qpu_asm(ci->instructions[c][e]);
}
printf("\n");
/**/
shader->bos[c] = vc4_bo_alloc_shader(controlFd, ci->instructions[c], &shader->sizes[c]);
}
else