diff --git a/driver/ControlListUtil.c b/driver/ControlListUtil.c index da6e069..abee839 100644 --- a/driver/ControlListUtil.c +++ b/driver/ControlListUtil.c @@ -115,7 +115,7 @@ void clInsertBranch(ControlList* cls, ControlListAddress address) assert(cls->nextFreeByte); *cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++; //TODO is this correct? - clEmitShaderRelocation(cls, &address); + //clEmitShaderRelocation(cls, &address); *(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4; } @@ -127,7 +127,7 @@ void clInsertBranchToSubList(ControlList* cls, ControlListAddress address) assert(cls->nextFreeByte); *cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++; //TODO is this correct? - clEmitShaderRelocation(cls, &address); + //clEmitShaderRelocation(cls, &address); *(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4; } @@ -385,7 +385,7 @@ void clInsertConfigurationBits(ControlList* cl, moveBits(depthTestFunction, 3, 12) | moveBits(zUpdatesEnable, 1, 15) | moveBits(earlyZEnable, 1, 16) | - moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 4; + moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 3; } void clInsertFlatShadeFlags(ControlList* cl, @@ -454,15 +454,16 @@ void clInsertClipWindow(ControlList* cl, //viewport centre x/y coordinate void clInsertViewPortOffset(ControlList* cl, - uint32_t x, //sint16 - uint32_t y //sint16 + int16_t x, //sint16 + int16_t y //sint16 ) { assert(cl); assert(cl->buffer); assert(cl->nextFreeByte); *cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++; - *(uint32_t*)cl->nextFreeByte = moveBits(x, 16, 0) | moveBits(y, 16, 16); cl->nextFreeByte += 4; + *(int16_t*)cl->nextFreeByte = x * 16; cl->nextFreeByte += 2; + *(int16_t*)cl->nextFreeByte = y * 16; cl->nextFreeByte += 2; } void clInsertZMinMaxClippingPlanes(ControlList* cl, @@ -615,6 +616,8 @@ void clInsertGEMRelocations(ControlList* cl, //input: 2 cls (cl, handles cl) void clInsertShaderRecord(ControlList* cls, + ControlList* relocCl, + ControlList* handlesCl, uint32_t fragmentShaderIsSingleThreaded, //0/1 uint32_t pointSizeIncludedInShadedVertexData, //0/1 uint32_t enableClipping, //0/1 @@ -644,14 +647,14 @@ void clInsertShaderRecord(ControlList* cls, *cls->nextFreeByte = 0; cls->nextFreeByte++; *(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2; *cls->nextFreeByte = fragmentNumberOfVaryings; cls->nextFreeByte++; - clEmitShaderRelocation(cls, &fragmentCodeAddress); + clEmitShaderRelocation(relocCl, handlesCl, &fragmentCodeAddress); *(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4; *(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4; *(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2; *cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++; *cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++; - clEmitShaderRelocation(cls, &vertexCodeAddress); + clEmitShaderRelocation(relocCl, handlesCl, &vertexCodeAddress); //TODO wtf??? *(uint32_t*)cls->nextFreeByte = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); cls->nextFreeByte += 4; cls->nextFreeByte += 4; @@ -659,13 +662,15 @@ void clInsertShaderRecord(ControlList* cls, *(uint16_t*)cls->nextFreeByte = moveBits(coordinateNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2; *cls->nextFreeByte = coordinateAttributeArraySelectBits; cls->nextFreeByte++; *cls->nextFreeByte = coordinateTotalAttributesSize; cls->nextFreeByte++; - clEmitShaderRelocation(cls, &coordinateCodeAddress); + clEmitShaderRelocation(relocCl, handlesCl, &coordinateCodeAddress); *(uint32_t*)cls->nextFreeByte = coordinateCodeAddress.offset; cls->nextFreeByte += 4; *(uint32_t*)cls->nextFreeByte = coordinateUniformsAddress; cls->nextFreeByte += 4; } //input: 2 cls (cl, handles cl) void clInsertAttributeRecord(ControlList* cls, + ControlList* relocCl, + ControlList* handlesCl, ControlListAddress address, uint32_t sizeBytes, uint32_t stride, @@ -677,7 +682,7 @@ void clInsertAttributeRecord(ControlList* cls, assert(cls->nextFreeByte); uint32_t sizeBytesMinusOne = sizeBytes - 1; //TODO is this correct? - clEmitShaderRelocation(cls, &address); + clEmitShaderRelocation(relocCl, handlesCl, &address); *(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4; *cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++; *cls->nextFreeByte = stride; cls->nextFreeByte++; @@ -708,21 +713,21 @@ uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle) } //input: 2 cls (cl + handles cl) -inline void clEmitShaderRelocation(ControlList* cls, const ControlListAddress* address) +inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, const ControlListAddress* address) { - assert(cls); - assert(cls->buffer); - assert(cls->nextFreeByte); + assert(relocCl); + assert(relocCl->buffer); + assert(relocCl->nextFreeByte); + assert(handlesCl); + assert(handlesCl->buffer); + assert(handlesCl->nextFreeByte); assert(address); assert(address->handle); - //search for handle in handles cl - //if found insert handle index - - ControlList* cl = cls; - ControlList* handlesCl = cls + 1; - //store offset within handles in cl - *(uint32_t*)cl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle); - cl->nextFreeByte += 4; + *(uint32_t*)relocCl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle); + relocCl->nextFreeByte += 4; } + +inline void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address) +{} diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index be0075e..0a4df1e 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -21,12 +21,13 @@ typedef struct ControlList uint8_t* nextFreeByte; //pointer to the next available free byte } ControlList; -void clEmitShaderRelocation(ControlList* cl, const ControlListAddress* address); +void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, const ControlListAddress* address); +void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address); #define __gen_user_data struct ControlList #define __gen_address_type ControlListAddress #define __gen_address_offset(reloc) ((reloc)->offset) -#define __gen_emit_reloc clEmitShaderRelocation +#define __gen_emit_reloc clDummyRelocation #include "brcm/cle/v3d_packet_v21_pack.h" @@ -95,8 +96,8 @@ void clInsertClipWindow(ControlList* cl, uint32_t bottomPixelCoord, //uint16 uint32_t leftPixelCoord); //uint16 void clInsertViewPortOffset(ControlList* cl, - uint32_t x, //sint16 - uint32_t y //sint16 + int16_t x, //sint16 + int16_t y //sint16 ); void clInsertZMinMaxClippingPlanes(ControlList* cl, float minZw, @@ -127,6 +128,8 @@ void clInsertGEMRelocations(ControlList* cl, uint32_t buffer0, uint32_t buffer1); void clInsertShaderRecord(ControlList* cls, + ControlList* relocCl, + ControlList* handlesCl, uint32_t fragmentShaderIsSingleThreaded, //0/1 uint32_t pointSizeIncludedInShadedVertexData, //0/1 uint32_t enableClipping, //0/1 @@ -145,6 +148,8 @@ void clInsertShaderRecord(ControlList* cls, uint32_t coordinateUniformsAddress, ControlListAddress coordinateCodeAddress); void clInsertAttributeRecord(ControlList* cls, + ControlList* relocCl, + ControlList* handlesCl, ControlListAddress address, uint32_t sizeBytes, uint32_t stride, diff --git a/driver/common.c b/driver/common.c index 58c9ff3..4f7755b 100644 --- a/driver/common.c +++ b/driver/common.c @@ -503,6 +503,13 @@ uint32_t getFormatByteSize(VkFormat format) } } +uint32_t ulog2(uint32_t v) +{ + uint32_t ret = 0; + while(v >>= 1) ret++; + return ret; +} + /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdDraw */ @@ -547,7 +554,7 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins 1, //TODO earlyz updates 0, //TODO earlyz enable 0, //TODO z updates - getDepthCompareOp(cb->graphicsPipeline->depthCompareOp), //depth compare func + cb->graphicsPipeline->depthTestEnable ? getDepthCompareOp(cb->graphicsPipeline->depthCompareOp) : V3D_COMPARE_FUNC_ALWAYS, //depth compare func 0, 0, 0, @@ -555,8 +562,8 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins 0, cb->graphicsPipeline->depthBiasEnable, //depth offset enable cb->graphicsPipeline->frontFace == VK_FRONT_FACE_CLOCKWISE, //clockwise - cb->graphicsPipeline->cullMode & VK_CULL_MODE_BACK_BIT, //enable back facing primitives - cb->graphicsPipeline->cullMode & VK_CULL_MODE_FRONT_BIT); //enable front facing primitives + !(cb->graphicsPipeline->cullMode & VK_CULL_MODE_BACK_BIT), //enable back facing primitives + !(cb->graphicsPipeline->cullMode & VK_CULL_MODE_FRONT_BIT)); //enable front facing primitives //TODO Depth Offset clFit(commandBuffer, &commandBuffer->binCl, V3D21_DEPTH_OFFSET_length); @@ -603,23 +610,33 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins //emit shader record ControlListAddress fragCode = { - .handle = ((_shaderModule*)(cb->graphicsPipeline->modules[VK_SHADER_STAGE_FRAGMENT_BIT]))->bos[VK_RPI_ASSEMBLY_TYPE_FRAGMENT], + .handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]))->bos[VK_RPI_ASSEMBLY_TYPE_FRAGMENT], .offset = 0, }; ControlListAddress vertCode = { - .handle = ((_shaderModule*)(cb->graphicsPipeline->modules[VK_SHADER_STAGE_VERTEX_BIT]))->bos[VK_RPI_ASSEMBLY_TYPE_VERTEX], + .handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]))->bos[VK_RPI_ASSEMBLY_TYPE_VERTEX], .offset = 0, }; ControlListAddress coordCode = { - .handle = ((_shaderModule*)(cb->graphicsPipeline->modules[VK_SHADER_STAGE_VERTEX_BIT]))->bos[VK_RPI_ASSEMBLY_TYPE_COORDINATE], + .handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]))->bos[VK_RPI_ASSEMBLY_TYPE_COORDINATE], .offset = 0, }; //TODO + commandBuffer->shaderRecCount++; clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_SHADER_RECORD_length); + ControlList relocCl = commandBuffer->shaderRecCl; + //TODO number of attribs + int numAttribs = 1; + for(int c = 0; c < (3 + numAttribs)*4; ++c) + { + clInsertNop(&commandBuffer->shaderRecCl); + } clInsertShaderRecord(&commandBuffer->shaderRecCl, + &relocCl, + &commandBuffer->handlesCl, 0, //single threaded? 0, //point size included in shaded vertex data? 0, //enable clipping? @@ -628,13 +645,13 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins 0, //fragment uniform address? fragCode, //fragment code address 0, //vertex number of unused uniforms? - 1, //vertex attribute array select bits - 1, //vertex total attribute size + 1, //TODO vertex attribute array select bits + 1, //TODO vertex total attribute size 0, //vertex uniform address vertCode, //vertex shader code address 0, //coordinate number of unused uniforms? - 1, //coordinate attribute array select bits - 1, //coordinate total attribute size + 1, //TODO coordinate attribute array select bits + 1, //TODO coordinate total attribute size 0, //coordinate uniform address coordCode //coordinate shader code address ); @@ -646,6 +663,8 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_ATTRIBUTE_RECORD_length); clInsertAttributeRecord(&commandBuffer->shaderRecCl, + &relocCl, + &commandBuffer->handlesCl, vertexBuffer, //address getFormatByteSize(cb->graphicsPipeline->vertexAttributeDescriptions[0].format), cb->graphicsPipeline->vertexBindingDescriptions[0].stride, //stride @@ -654,16 +673,16 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins ); //insert vertex buffer handle - clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t vboIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertexBuffer.handle); + //clFit(commandBuffer, &commandBuffer->handlesCl, 4); + //uint32_t vboIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertexBuffer.handle); //insert shader code handles - clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t vertIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertCode.handle); - clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t coordIdx = clGetHandleIndex(&commandBuffer->handlesCl, coordCode.handle); - clFit(commandBuffer, &commandBuffer->handlesCl, 4); - uint32_t fragIdx = clGetHandleIndex(&commandBuffer->handlesCl, fragCode.handle); + //clFit(commandBuffer, &commandBuffer->handlesCl, 4); + //uint32_t vertIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertCode.handle); + //clFit(commandBuffer, &commandBuffer->handlesCl, 4); + //uint32_t coordIdx = clGetHandleIndex(&commandBuffer->handlesCl, coordCode.handle); + //clFit(commandBuffer, &commandBuffer->handlesCl, 4); + //uint32_t fragIdx = clGetHandleIndex(&commandBuffer->handlesCl, fragCode.handle); //Insert image handle index clFit(commandBuffer, &commandBuffer->handlesCl, 4); @@ -914,7 +933,10 @@ VkResult vkCreateFramebuffer(VkDevice device, const VkFramebufferCreateInfo* pCr return VK_ERROR_OUT_OF_HOST_MEMORY; } - memcpy(fb->attachmentViews, pCreateInfo->pAttachments, sizeof(_imageView) * fb->numAttachmentViews); + for(int c = 0; c < fb->numAttachmentViews; ++c) + { + memcpy(&fb->attachmentViews[c], pCreateInfo->pAttachments[c], sizeof(_imageView)); + } fb->width = pCreateInfo->width; fb->height = pCreateInfo->height; @@ -971,13 +993,6 @@ VkResult vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo* p return VK_SUCCESS; } -uint32_t ulog2(uint32_t v) -{ - uint32_t ret = 0; - while(v >>= 1) ret++; - return ret; -} - /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateGraphicsPipelines */ diff --git a/test/triangle/triangle.cpp b/test/triangle/triangle.cpp index 529f8fa..e1a94f0 100644 --- a/test/triangle/triangle.cpp +++ b/test/triangle/triangle.cpp @@ -17,8 +17,8 @@ //GLFWwindow * window; -#define WINDOW_WIDTH 640 -#define WINDOW_HEIGHT 480 +//#define WINDOW_WIDTH 640 +//#define WINDOW_HEIGHT 480 const char* fragShader = "#version 100\n" @@ -110,6 +110,7 @@ VkDeviceMemory vertexBufferMemory; VkPhysicalDeviceMemoryProperties pdmp; std::vector views; //? VkSurfaceFormatKHR swapchainFormat; +VkExtent2D swapChainExtent; uint32_t graphicsQueueFamily; uint32_t presentQueueFamily; @@ -186,7 +187,7 @@ void setupVulkan() { void mainLoop() { //while (!glfwWindowShouldClose(window)) { - for(int c = 0; c < 10; ++c){ + for(int c = 0; c < 1; ++c){ draw(); //glfwPollEvents(); @@ -477,7 +478,7 @@ void createSwapChain() { swapchainFormat = chooseSurfaceFormat(surfaceFormats); // Select swap chain size - VkExtent2D swapChainExtent = chooseSwapExtent(surfaceCapabilities); + swapChainExtent = chooseSwapExtent(surfaceCapabilities); // Check if swap chain supports being the destination of an image transfer // Note: AMD driver bug, though it would be nice to implement a workaround that doesn't use transfering @@ -568,8 +569,8 @@ VkExtent2D chooseSwapExtent(const VkSurfaceCapabilitiesKHR& surfaceCapabilities) #define min(a, b) (a < b ? a : b) #define max(a, b) (a > b ? a : b) - swapChainExtent.width = min(max(WINDOW_WIDTH, surfaceCapabilities.minImageExtent.width), surfaceCapabilities.maxImageExtent.width); - swapChainExtent.height = min(max(WINDOW_HEIGHT, surfaceCapabilities.minImageExtent.height), surfaceCapabilities.maxImageExtent.height); + swapChainExtent.width = min(max(640, surfaceCapabilities.minImageExtent.width), surfaceCapabilities.maxImageExtent.width); + swapChainExtent.height = min(max(480, surfaceCapabilities.minImageExtent.height), surfaceCapabilities.maxImageExtent.height); return swapChainExtent; } @@ -650,20 +651,20 @@ void recordCommandBuffers() renderPassInfo.renderPass = renderPass; renderPassInfo.renderArea.offset.x = 0; renderPassInfo.renderArea.offset.y = 0; - renderPassInfo.renderArea.extent.width = WINDOW_WIDTH; - renderPassInfo.renderArea.extent.height = WINDOW_HEIGHT; + renderPassInfo.renderArea.extent.width = swapChainExtent.width; + renderPassInfo.renderArea.extent.height = swapChainExtent.height; renderPassInfo.clearValueCount = 1; renderPassInfo.pClearValues = &clearValue; VkViewport viewport = { 0 }; - viewport.height = (float)WINDOW_HEIGHT; - viewport.width = (float)WINDOW_WIDTH; + viewport.height = (float)swapChainExtent.width; + viewport.width = (float)swapChainExtent.height; viewport.minDepth = (float)0.0f; viewport.maxDepth = (float)1.0f; VkRect2D scissor = { 0 }; - scissor.extent.width = WINDOW_WIDTH; - scissor.extent.height = WINDOW_HEIGHT; + scissor.extent.width = swapChainExtent.width; + scissor.extent.height = swapChainExtent.height; scissor.offset.x = 0; scissor.offset.y = 0; @@ -815,8 +816,8 @@ void CreateFramebuffer() fbCreateInfo.renderPass = renderPass; fbCreateInfo.attachmentCount = 1; fbCreateInfo.pAttachments = &views[i]; - fbCreateInfo.width = WINDOW_WIDTH; - fbCreateInfo.height = WINDOW_HEIGHT; + fbCreateInfo.width = swapChainExtent.width; + fbCreateInfo.height = swapChainExtent.height; fbCreateInfo.layers = 1; res = vkCreateFramebuffer(device, &fbCreateInfo, NULL, &fbs[i]); @@ -944,8 +945,8 @@ void CreatePipeline() VkViewport vp = {}; vp.x = 0.0f; vp.y = 0.0f; - vp.width = (float)WINDOW_WIDTH; - vp.height = (float)WINDOW_HEIGHT; + vp.width = (float)swapChainExtent.width; + vp.height = (float)swapChainExtent.height; vp.minDepth = 0.0f; vp.maxDepth = 1.0f;