1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-01-30 22:52:14 +01:00

fixed shader state count

This commit is contained in:
Unknown 2018-10-14 11:12:17 +01:00
parent c15e06358e
commit 25c1b9b2f3
4 changed files with 94 additions and 68 deletions

View File

@ -115,7 +115,7 @@ void clInsertBranch(ControlList* cls, ControlListAddress address)
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
//clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
}
@ -127,7 +127,7 @@ void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
//clEmitShaderRelocation(cls, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
}
@ -385,7 +385,7 @@ void clInsertConfigurationBits(ControlList* cl,
moveBits(depthTestFunction, 3, 12) |
moveBits(zUpdatesEnable, 1, 15) |
moveBits(earlyZEnable, 1, 16) |
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 4;
moveBits(earlyZUpdatesEnable, 1, 17); cl->nextFreeByte += 3;
}
void clInsertFlatShadeFlags(ControlList* cl,
@ -454,15 +454,16 @@ void clInsertClipWindow(ControlList* cl,
//viewport centre x/y coordinate
void clInsertViewPortOffset(ControlList* cl,
uint32_t x, //sint16
uint32_t y //sint16
int16_t x, //sint16
int16_t y //sint16
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(x, 16, 0) | moveBits(y, 16, 16); cl->nextFreeByte += 4;
*(int16_t*)cl->nextFreeByte = x * 16; cl->nextFreeByte += 2;
*(int16_t*)cl->nextFreeByte = y * 16; cl->nextFreeByte += 2;
}
void clInsertZMinMaxClippingPlanes(ControlList* cl,
@ -615,6 +616,8 @@ void clInsertGEMRelocations(ControlList* cl,
//input: 2 cls (cl, handles cl)
void clInsertShaderRecord(ControlList* cls,
ControlList* relocCl,
ControlList* handlesCl,
uint32_t fragmentShaderIsSingleThreaded, //0/1
uint32_t pointSizeIncludedInShadedVertexData, //0/1
uint32_t enableClipping, //0/1
@ -644,14 +647,14 @@ void clInsertShaderRecord(ControlList* cls,
*cls->nextFreeByte = 0; cls->nextFreeByte++;
*(uint16_t*)cls->nextFreeByte = moveBits(fragmentNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = fragmentNumberOfVaryings; cls->nextFreeByte++;
clEmitShaderRelocation(cls, &fragmentCodeAddress);
clEmitShaderRelocation(relocCl, handlesCl, &fragmentCodeAddress);
*(uint32_t*)cls->nextFreeByte = fragmentCodeAddress.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = fragmentUniformsAddress; cls->nextFreeByte += 4;
*(uint16_t*)cls->nextFreeByte = moveBits(vertexNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = vertexAttributeArraySelectBits; cls->nextFreeByte++;
*cls->nextFreeByte = vertexTotalAttributesSize; cls->nextFreeByte++;
clEmitShaderRelocation(cls, &vertexCodeAddress);
clEmitShaderRelocation(relocCl, handlesCl, &vertexCodeAddress);
//TODO wtf???
*(uint32_t*)cls->nextFreeByte = moveBits(vertexCodeAddress.offset, 32, 0) | moveBits(vertexUniformsAddress, 32, 0); cls->nextFreeByte += 4;
cls->nextFreeByte += 4;
@ -659,13 +662,15 @@ void clInsertShaderRecord(ControlList* cls,
*(uint16_t*)cls->nextFreeByte = moveBits(coordinateNumberOfUnusedUniforms, 16, 0); cls->nextFreeByte += 2;
*cls->nextFreeByte = coordinateAttributeArraySelectBits; cls->nextFreeByte++;
*cls->nextFreeByte = coordinateTotalAttributesSize; cls->nextFreeByte++;
clEmitShaderRelocation(cls, &coordinateCodeAddress);
clEmitShaderRelocation(relocCl, handlesCl, &coordinateCodeAddress);
*(uint32_t*)cls->nextFreeByte = coordinateCodeAddress.offset; cls->nextFreeByte += 4;
*(uint32_t*)cls->nextFreeByte = coordinateUniformsAddress; cls->nextFreeByte += 4;
}
//input: 2 cls (cl, handles cl)
void clInsertAttributeRecord(ControlList* cls,
ControlList* relocCl,
ControlList* handlesCl,
ControlListAddress address,
uint32_t sizeBytes,
uint32_t stride,
@ -677,7 +682,7 @@ void clInsertAttributeRecord(ControlList* cls,
assert(cls->nextFreeByte);
uint32_t sizeBytesMinusOne = sizeBytes - 1;
//TODO is this correct?
clEmitShaderRelocation(cls, &address);
clEmitShaderRelocation(relocCl, handlesCl, &address);
*(uint32_t*)cls->nextFreeByte = address.offset; cls->nextFreeByte += 4;
*cls->nextFreeByte = sizeBytesMinusOne; cls->nextFreeByte++;
*cls->nextFreeByte = stride; cls->nextFreeByte++;
@ -708,21 +713,21 @@ uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle)
}
//input: 2 cls (cl + handles cl)
inline void clEmitShaderRelocation(ControlList* cls, const ControlListAddress* address)
inline void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, const ControlListAddress* address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
assert(relocCl);
assert(relocCl->buffer);
assert(relocCl->nextFreeByte);
assert(handlesCl);
assert(handlesCl->buffer);
assert(handlesCl->nextFreeByte);
assert(address);
assert(address->handle);
//search for handle in handles cl
//if found insert handle index
ControlList* cl = cls;
ControlList* handlesCl = cls + 1;
//store offset within handles in cl
*(uint32_t*)cl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle);
cl->nextFreeByte += 4;
*(uint32_t*)relocCl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle);
relocCl->nextFreeByte += 4;
}
inline void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address)
{}

View File

@ -21,12 +21,13 @@ typedef struct ControlList
uint8_t* nextFreeByte; //pointer to the next available free byte
} ControlList;
void clEmitShaderRelocation(ControlList* cl, const ControlListAddress* address);
void clEmitShaderRelocation(ControlList* relocCl, ControlList* handlesCl, const ControlListAddress* address);
void clDummyRelocation(ControlList* relocCl, const ControlListAddress* address);
#define __gen_user_data struct ControlList
#define __gen_address_type ControlListAddress
#define __gen_address_offset(reloc) ((reloc)->offset)
#define __gen_emit_reloc clEmitShaderRelocation
#define __gen_emit_reloc clDummyRelocation
#include "brcm/cle/v3d_packet_v21_pack.h"
@ -95,8 +96,8 @@ void clInsertClipWindow(ControlList* cl,
uint32_t bottomPixelCoord, //uint16
uint32_t leftPixelCoord); //uint16
void clInsertViewPortOffset(ControlList* cl,
uint32_t x, //sint16
uint32_t y //sint16
int16_t x, //sint16
int16_t y //sint16
);
void clInsertZMinMaxClippingPlanes(ControlList* cl,
float minZw,
@ -127,6 +128,8 @@ void clInsertGEMRelocations(ControlList* cl,
uint32_t buffer0,
uint32_t buffer1);
void clInsertShaderRecord(ControlList* cls,
ControlList* relocCl,
ControlList* handlesCl,
uint32_t fragmentShaderIsSingleThreaded, //0/1
uint32_t pointSizeIncludedInShadedVertexData, //0/1
uint32_t enableClipping, //0/1
@ -145,6 +148,8 @@ void clInsertShaderRecord(ControlList* cls,
uint32_t coordinateUniformsAddress,
ControlListAddress coordinateCodeAddress);
void clInsertAttributeRecord(ControlList* cls,
ControlList* relocCl,
ControlList* handlesCl,
ControlListAddress address,
uint32_t sizeBytes,
uint32_t stride,

View File

@ -503,6 +503,13 @@ uint32_t getFormatByteSize(VkFormat format)
}
}
uint32_t ulog2(uint32_t v)
{
uint32_t ret = 0;
while(v >>= 1) ret++;
return ret;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdDraw
*/
@ -547,7 +554,7 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
1, //TODO earlyz updates
0, //TODO earlyz enable
0, //TODO z updates
getDepthCompareOp(cb->graphicsPipeline->depthCompareOp), //depth compare func
cb->graphicsPipeline->depthTestEnable ? getDepthCompareOp(cb->graphicsPipeline->depthCompareOp) : V3D_COMPARE_FUNC_ALWAYS, //depth compare func
0,
0,
0,
@ -555,8 +562,8 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
0,
cb->graphicsPipeline->depthBiasEnable, //depth offset enable
cb->graphicsPipeline->frontFace == VK_FRONT_FACE_CLOCKWISE, //clockwise
cb->graphicsPipeline->cullMode & VK_CULL_MODE_BACK_BIT, //enable back facing primitives
cb->graphicsPipeline->cullMode & VK_CULL_MODE_FRONT_BIT); //enable front facing primitives
!(cb->graphicsPipeline->cullMode & VK_CULL_MODE_BACK_BIT), //enable back facing primitives
!(cb->graphicsPipeline->cullMode & VK_CULL_MODE_FRONT_BIT)); //enable front facing primitives
//TODO Depth Offset
clFit(commandBuffer, &commandBuffer->binCl, V3D21_DEPTH_OFFSET_length);
@ -603,23 +610,33 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
//emit shader record
ControlListAddress fragCode = {
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[VK_SHADER_STAGE_FRAGMENT_BIT]))->bos[VK_RPI_ASSEMBLY_TYPE_FRAGMENT],
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]))->bos[VK_RPI_ASSEMBLY_TYPE_FRAGMENT],
.offset = 0,
};
ControlListAddress vertCode = {
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[VK_SHADER_STAGE_VERTEX_BIT]))->bos[VK_RPI_ASSEMBLY_TYPE_VERTEX],
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]))->bos[VK_RPI_ASSEMBLY_TYPE_VERTEX],
.offset = 0,
};
ControlListAddress coordCode = {
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[VK_SHADER_STAGE_VERTEX_BIT]))->bos[VK_RPI_ASSEMBLY_TYPE_COORDINATE],
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]))->bos[VK_RPI_ASSEMBLY_TYPE_COORDINATE],
.offset = 0,
};
//TODO
commandBuffer->shaderRecCount++;
clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_SHADER_RECORD_length);
ControlList relocCl = commandBuffer->shaderRecCl;
//TODO number of attribs
int numAttribs = 1;
for(int c = 0; c < (3 + numAttribs)*4; ++c)
{
clInsertNop(&commandBuffer->shaderRecCl);
}
clInsertShaderRecord(&commandBuffer->shaderRecCl,
&relocCl,
&commandBuffer->handlesCl,
0, //single threaded?
0, //point size included in shaded vertex data?
0, //enable clipping?
@ -628,13 +645,13 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
0, //fragment uniform address?
fragCode, //fragment code address
0, //vertex number of unused uniforms?
1, //vertex attribute array select bits
1, //vertex total attribute size
1, //TODO vertex attribute array select bits
1, //TODO vertex total attribute size
0, //vertex uniform address
vertCode, //vertex shader code address
0, //coordinate number of unused uniforms?
1, //coordinate attribute array select bits
1, //coordinate total attribute size
1, //TODO coordinate attribute array select bits
1, //TODO coordinate total attribute size
0, //coordinate uniform address
coordCode //coordinate shader code address
);
@ -646,6 +663,8 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_ATTRIBUTE_RECORD_length);
clInsertAttributeRecord(&commandBuffer->shaderRecCl,
&relocCl,
&commandBuffer->handlesCl,
vertexBuffer, //address
getFormatByteSize(cb->graphicsPipeline->vertexAttributeDescriptions[0].format),
cb->graphicsPipeline->vertexBindingDescriptions[0].stride, //stride
@ -654,16 +673,16 @@ void vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t ins
);
//insert vertex buffer handle
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t vboIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertexBuffer.handle);
//clFit(commandBuffer, &commandBuffer->handlesCl, 4);
//uint32_t vboIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertexBuffer.handle);
//insert shader code handles
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t vertIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertCode.handle);
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t coordIdx = clGetHandleIndex(&commandBuffer->handlesCl, coordCode.handle);
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t fragIdx = clGetHandleIndex(&commandBuffer->handlesCl, fragCode.handle);
//clFit(commandBuffer, &commandBuffer->handlesCl, 4);
//uint32_t vertIdx = clGetHandleIndex(&commandBuffer->handlesCl, vertCode.handle);
//clFit(commandBuffer, &commandBuffer->handlesCl, 4);
//uint32_t coordIdx = clGetHandleIndex(&commandBuffer->handlesCl, coordCode.handle);
//clFit(commandBuffer, &commandBuffer->handlesCl, 4);
//uint32_t fragIdx = clGetHandleIndex(&commandBuffer->handlesCl, fragCode.handle);
//Insert image handle index
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
@ -914,7 +933,10 @@ VkResult vkCreateFramebuffer(VkDevice device, const VkFramebufferCreateInfo* pCr
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(fb->attachmentViews, pCreateInfo->pAttachments, sizeof(_imageView) * fb->numAttachmentViews);
for(int c = 0; c < fb->numAttachmentViews; ++c)
{
memcpy(&fb->attachmentViews[c], pCreateInfo->pAttachments[c], sizeof(_imageView));
}
fb->width = pCreateInfo->width;
fb->height = pCreateInfo->height;
@ -971,13 +993,6 @@ VkResult vkCreateShaderModule(VkDevice device, const VkShaderModuleCreateInfo* p
return VK_SUCCESS;
}
uint32_t ulog2(uint32_t v)
{
uint32_t ret = 0;
while(v >>= 1) ret++;
return ret;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateGraphicsPipelines
*/

View File

@ -17,8 +17,8 @@
//GLFWwindow * window;
#define WINDOW_WIDTH 640
#define WINDOW_HEIGHT 480
//#define WINDOW_WIDTH 640
//#define WINDOW_HEIGHT 480
const char* fragShader =
"#version 100\n"
@ -110,6 +110,7 @@ VkDeviceMemory vertexBufferMemory;
VkPhysicalDeviceMemoryProperties pdmp;
std::vector<VkImageView> views; //?
VkSurfaceFormatKHR swapchainFormat;
VkExtent2D swapChainExtent;
uint32_t graphicsQueueFamily;
uint32_t presentQueueFamily;
@ -186,7 +187,7 @@ void setupVulkan() {
void mainLoop() {
//while (!glfwWindowShouldClose(window)) {
for(int c = 0; c < 10; ++c){
for(int c = 0; c < 1; ++c){
draw();
//glfwPollEvents();
@ -477,7 +478,7 @@ void createSwapChain() {
swapchainFormat = chooseSurfaceFormat(surfaceFormats);
// Select swap chain size
VkExtent2D swapChainExtent = chooseSwapExtent(surfaceCapabilities);
swapChainExtent = chooseSwapExtent(surfaceCapabilities);
// Check if swap chain supports being the destination of an image transfer
// Note: AMD driver bug, though it would be nice to implement a workaround that doesn't use transfering
@ -568,8 +569,8 @@ VkExtent2D chooseSwapExtent(const VkSurfaceCapabilitiesKHR& surfaceCapabilities)
#define min(a, b) (a < b ? a : b)
#define max(a, b) (a > b ? a : b)
swapChainExtent.width = min(max(WINDOW_WIDTH, surfaceCapabilities.minImageExtent.width), surfaceCapabilities.maxImageExtent.width);
swapChainExtent.height = min(max(WINDOW_HEIGHT, surfaceCapabilities.minImageExtent.height), surfaceCapabilities.maxImageExtent.height);
swapChainExtent.width = min(max(640, surfaceCapabilities.minImageExtent.width), surfaceCapabilities.maxImageExtent.width);
swapChainExtent.height = min(max(480, surfaceCapabilities.minImageExtent.height), surfaceCapabilities.maxImageExtent.height);
return swapChainExtent;
}
@ -650,20 +651,20 @@ void recordCommandBuffers()
renderPassInfo.renderPass = renderPass;
renderPassInfo.renderArea.offset.x = 0;
renderPassInfo.renderArea.offset.y = 0;
renderPassInfo.renderArea.extent.width = WINDOW_WIDTH;
renderPassInfo.renderArea.extent.height = WINDOW_HEIGHT;
renderPassInfo.renderArea.extent.width = swapChainExtent.width;
renderPassInfo.renderArea.extent.height = swapChainExtent.height;
renderPassInfo.clearValueCount = 1;
renderPassInfo.pClearValues = &clearValue;
VkViewport viewport = { 0 };
viewport.height = (float)WINDOW_HEIGHT;
viewport.width = (float)WINDOW_WIDTH;
viewport.height = (float)swapChainExtent.width;
viewport.width = (float)swapChainExtent.height;
viewport.minDepth = (float)0.0f;
viewport.maxDepth = (float)1.0f;
VkRect2D scissor = { 0 };
scissor.extent.width = WINDOW_WIDTH;
scissor.extent.height = WINDOW_HEIGHT;
scissor.extent.width = swapChainExtent.width;
scissor.extent.height = swapChainExtent.height;
scissor.offset.x = 0;
scissor.offset.y = 0;
@ -815,8 +816,8 @@ void CreateFramebuffer()
fbCreateInfo.renderPass = renderPass;
fbCreateInfo.attachmentCount = 1;
fbCreateInfo.pAttachments = &views[i];
fbCreateInfo.width = WINDOW_WIDTH;
fbCreateInfo.height = WINDOW_HEIGHT;
fbCreateInfo.width = swapChainExtent.width;
fbCreateInfo.height = swapChainExtent.height;
fbCreateInfo.layers = 1;
res = vkCreateFramebuffer(device, &fbCreateInfo, NULL, &fbs[i]);
@ -944,8 +945,8 @@ void CreatePipeline()
VkViewport vp = {};
vp.x = 0.0f;
vp.y = 0.0f;
vp.width = (float)WINDOW_WIDTH;
vp.height = (float)WINDOW_HEIGHT;
vp.width = (float)swapChainExtent.width;
vp.height = (float)swapChainExtent.height;
vp.minDepth = 0.0f;
vp.maxDepth = 1.0f;