1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-04 16:24:15 +01:00
rpi-vk-driver/driver/draw.c
2020-04-20 13:45:17 +01:00

497 lines
18 KiB
C

#include "common.h"
#include "kernel/vc4_packet.h"
//returns max index
static uint32_t drawCommon(VkCommandBuffer commandBuffer)
{
assert(commandBuffer);
_commandBuffer* cb = commandBuffer;
//TODO handle cases when submitting >65k vertices in a VBO
//TODO HW-2116 workaround
//TODO GFXH-515 / SW-5891 workaround
//TODO make this as lightweight as possible to make sure
//as many drawcalls can be submitted as possible
//uint32_t vertexBufferDirty;
//uint32_t indexBufferDirty;
///uint32_t viewportDirty;
///uint32_t lineWidthDirty;
///uint32_t depthBiasDirty;
///uint32_t depthBoundsDirty;
//uint32_t graphicsPipelineDirty;
//uint32_t computePipelineDirty;
//uint32_t subpassDirty;
//uint32_t blendConstantsDirty;
//uint32_t scissorDirty;
//uint32_t stencilCompareMaskDirty;
//uint32_t stencilWriteMaskDirty;
//uint32_t stencilReferenceDirty;
//uint32_t descriptorSetDirty;
//uint32_t pushConstantDirty;
//TODO multiple viewports
VkViewport vp;
vp = cb->graphicsPipeline->viewports[0];
for(uint32_t c = 0; c < cb->graphicsPipeline->dynamicStateCount; ++c)
{
if(cb->graphicsPipeline->dynamicStates[c] == VK_DYNAMIC_STATE_VIEWPORT)
{
vp = cb->viewport;
}
}
//if(cb->lineWidthDirty)
{
//Line width
clFit(commandBuffer, &commandBuffer->binCl, V3D21_LINE_WIDTH_length);
clInsertLineWidth(&commandBuffer->binCl, cb->graphicsPipeline->lineWidth);
cb->lineWidthDirty = 0;
}
//if(cb->viewportDirty)
{
//Clip Window
clFit(commandBuffer, &commandBuffer->binCl, V3D21_CLIP_WINDOW_length);
clInsertClipWindow(&commandBuffer->binCl,
vp.width,
vp.height,
vp.y, //bottom pixel coord
vp.x); //left pixel coord
//TODO why flipped???
//Clipper XY Scaling
clFit(commandBuffer, &commandBuffer->binCl, V3D21_CLIPPER_XY_SCALING_length);
clInsertClipperXYScaling(&commandBuffer->binCl, (float)(vp.width) * 0.5f * 16.0f, -1.0f * (float)(vp.height) * 0.5f * 16.0f);
//Viewport Offset
clFit(commandBuffer, &commandBuffer->binCl, V3D21_VIEWPORT_OFFSET_length);
clInsertViewPortOffset(&commandBuffer->binCl, vp.width * 0.5f, vp.height * 0.5f);
cb->viewportDirty = 0;
}
//if(cb->depthBiasDirty || cb->depthBoundsDirty)
{
//Configuration Bits
clFit(commandBuffer, &commandBuffer->binCl, V3D21_CONFIGURATION_BITS_length);
clInsertConfigurationBits(&commandBuffer->binCl,
1, //earlyz updates enable
cb->graphicsPipeline->depthTestEnable, //earlyz enable
cb->graphicsPipeline->depthWriteEnable && cb->graphicsPipeline->depthTestEnable, //z updates enable
cb->graphicsPipeline->depthTestEnable ? getCompareOp(cb->graphicsPipeline->depthCompareOp) : V3D_COMPARE_FUNC_ALWAYS, //depth compare func
0, //coverage read mode
0, //coverage pipe select
0, //coverage update mode
0, //coverage read type
cb->graphicsPipeline->rasterizationSamples > 1, //rasterizer oversample mode
cb->graphicsPipeline->depthBiasEnable, //depth offset enable
cb->graphicsPipeline->frontFace == VK_FRONT_FACE_CLOCKWISE, //clockwise
!(cb->graphicsPipeline->cullMode & VK_CULL_MODE_BACK_BIT), //enable back facing primitives
!(cb->graphicsPipeline->cullMode & VK_CULL_MODE_FRONT_BIT)); //enable front facing primitives
//TODO Depth Offset
clFit(commandBuffer, &commandBuffer->binCl, V3D21_DEPTH_OFFSET_length);
clInsertDepthOffset(&commandBuffer->binCl, cb->graphicsPipeline->depthBiasConstantFactor, cb->graphicsPipeline->depthBiasSlopeFactor);
//TODO how is this calculated?
//it's Zc to Zs scale and bias
//seems to go from -1.0 .. 1.0 to 0.0 .. 1.0
//eg. x * 0.5 + 0.5
//cb->graphicsPipeline->minDepthBounds;
//Clipper Z Scale and Offset
clFit(commandBuffer, &commandBuffer->binCl, V3D21_CLIPPER_Z_SCALE_AND_OFFSET_length);
clInsertClipperZScaleOffset(&commandBuffer->binCl, 0.5f, 0.5f);
cb->vertexBufferDirty = 0;
cb->depthBoundsDirty = 0;
}
//Point size
clFit(commandBuffer, &commandBuffer->binCl, V3D21_POINT_SIZE_length);
clInsertPointSize(&commandBuffer->binCl, 1.0f);
//TODO?
//Flat Shade Flags
clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLAT_SHADE_FLAGS_length);
clInsertFlatShadeFlags(&commandBuffer->binCl, 0);
//GL Shader State
clFit(commandBuffer, &commandBuffer->binCl, V3D21_GL_SHADER_STATE_length);
clInsertShaderState(&commandBuffer->binCl,
0, //shader state record address
0, //extended shader state record
cb->graphicsPipeline->vertexAttributeDescriptionCount & 0x7); //number of attribute arrays, 0 -> 8
//emit shader record
ControlListAddress fragCode = {
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]))->bos[RPI_ASSEMBLY_TYPE_FRAGMENT],
.offset = 0,
};
ControlListAddress vertCode = {
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]))->bos[RPI_ASSEMBLY_TYPE_VERTEX],
.offset = 0,
};
ControlListAddress coordCode = {
.handle = ((_shaderModule*)(cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]))->bos[RPI_ASSEMBLY_TYPE_COORDINATE],
.offset = 0,
};
//TODO
commandBuffer->shaderRecCount++;
clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_SHADER_RECORD_length);
ControlList relocCl = commandBuffer->shaderRecCl;
uint32_t attribCount = 0;
uint32_t attribSelectBits = 0;
for(uint32_t c = 0 ; c < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++c)
{
if(cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding])
{
attribCount++;
attribSelectBits |= 1 << cb->graphicsPipeline->vertexAttributeDescriptions[c].location;
}
}
uint32_t attribSize = 0;
for(uint32_t c = 0; c < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++c)
{
attribSize += getFormatByteSize(cb->graphicsPipeline->vertexAttributeDescriptions[c].format);
}
//number of attribs
//3 is the number of type of possible shaders
for(int c = 0; c < (3 + attribCount)*4; ++c)
{
clInsertNop(&commandBuffer->shaderRecCl);
}
clInsertShaderRecord(&commandBuffer->shaderRecCl,
&relocCl,
&commandBuffer->handlesCl,
cb->binCl.currMarker->handlesBuf,
cb->binCl.currMarker->handlesSize,
!cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->hasThreadSwitch,
0, //TODO point size included in shaded vertex data?
1, //enable clipping
0, //TODO fragment number of used uniforms?
cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->numVaryings, //fragment number of varyings
0, //fragment uniform address?
fragCode, //fragment code address
0, //TODO vertex number of used uniforms?
attribSelectBits, //vertex attribute array select bits
attribSize, //vertex total attribute size
0, //vertex uniform address
vertCode, //vertex shader code address
0, //TODO coordinate number of used uniforms?
//TODO how do we know which attribute contains the vertices?
//for now the first one will be hardcoded to have the vertices...
1 << 0, //coordinate attribute array select bits
getFormatByteSize(cb->graphicsPipeline->vertexAttributeDescriptions[0].format), //coordinate total attribute size
0, //coordinate uniform address
coordCode //coordinate shader code address
);
uint32_t maxIndex = 0xffff;
for(uint32_t c = 0 ; c < cb->graphicsPipeline->vertexAttributeDescriptionCount; ++c)
{
if(cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding])
{
uint32_t formatByteSize = getFormatByteSize(cb->graphicsPipeline->vertexAttributeDescriptions[c].format);
//TODO the indexing here is incorrect....
uint32_t stride = cb->graphicsPipeline->vertexBindingDescriptions[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding].stride;
if(stride > 0)
{
//TODO offset
uint32_t usedIndices = (cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundMem->size - formatByteSize) / stride;
if(usedIndices < maxIndex)
{
maxIndex = usedIndices;
}
}
ControlListAddress vertexBuffer = {
.handle = cb->vertexBuffers[cb->graphicsPipeline->vertexAttributeDescriptions[c].binding]->boundMem->bo,
.offset = cb->graphicsPipeline->vertexAttributeDescriptions[c].offset,
};
clFit(commandBuffer, &commandBuffer->shaderRecCl, V3D21_ATTRIBUTE_RECORD_length);
clInsertAttributeRecord(&commandBuffer->shaderRecCl,
&relocCl,
&commandBuffer->handlesCl,
cb->binCl.currMarker->handlesBuf,
cb->binCl.currMarker->handlesSize,
vertexBuffer, //reloc address
formatByteSize,
stride,
cb->graphicsPipeline->vertexAttributeDescriptions[c].offset, //vertex vpm offset
cb->graphicsPipeline->vertexAttributeDescriptions[c].offset //coordinte vpm offset
);
}
}
//write uniforms
_pipelineLayout* pl = cb->graphicsPipeline->layout;
//kernel side expects relocations first!
for(uint32_t c = 0; c < cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->numMappings; ++c)
{
VkRpiAssemblyMappingEXT mapping = cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->mappings[c];
if(mapping.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT)
{
if(mapping.mappingType == VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR)
{
if(mapping.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
{
_descriptorSet* ds = getMapElement(pl->descriptorSetBindingMap, mapping.descriptorSet);
_descriptorImage* di = getMapElement(ds->imageBindingMap, mapping.descriptorBinding);
di += mapping.descriptorArrayElement;
//emit reloc for texture BO
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, di->imageView->image->boundMem->bo);
//emit tex bo reloc index
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
clInsertData(&commandBuffer->uniformsCl, 4, &idx);
}
else if(mapping.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
{
_descriptorSet* ds = getMapElement(pl->descriptorSetBindingMap, mapping.descriptorSet);
_descriptorBuffer* db = getMapElement(ds->bufferBindingMap, mapping.descriptorBinding);
db += mapping.descriptorArrayElement;
//emit reloc for BO
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, db->buffer->boundMem->bo);
//emit bo reloc index
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
clInsertData(&commandBuffer->uniformsCl, 4, &idx);
}
else if(mapping.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)
{
_descriptorSet* ds = getMapElement(pl->descriptorSetBindingMap, mapping.descriptorSet);
_descriptorTexelBuffer* dtb = getMapElement(ds->texelBufferBindingMap, mapping.descriptorBinding);
dtb += mapping.descriptorArrayElement;
//emit reloc for BO
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, dtb->bufferView->buffer->boundMem->bo);
//emit bo reloc index
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
clInsertData(&commandBuffer->uniformsCl, 4, &idx);
}
else
{
assert(0); //shouldn't happen
}
}
}
}
//after relocs we can proceed with the usual uniforms
for(uint32_t c = 0; c < cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->numMappings; ++c)
{
VkRpiAssemblyMappingEXT mapping = cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_FRAGMENT_BIT)]->mappings[c];
if(mapping.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT)
{
if(mapping.mappingType == VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT)
{
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
clInsertData(&commandBuffer->uniformsCl, 4, cb->pushConstantBufferPixel + mapping.resourceOffset);
}
else if(mapping.mappingType == VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR)
{
if(mapping.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
mapping.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
{
_descriptorSet* ds = getMapElement(pl->descriptorSetBindingMap, mapping.descriptorSet);
_descriptorImage* di = getMapElement(ds->imageBindingMap, mapping.descriptorBinding);
di += mapping.descriptorArrayElement;
uint32_t cubemapStride = (di->imageView->image->width * di->imageView->image->height * getFormatBpp(di->imageView->interpretedFormat)) >> 3;
fprintf(stderr, "cubemap stride %i\n", cubemapStride);
//TODO handle miplevels according to subresource rage?
uint32_t params[4];
encodeTextureUniform(params,
di->imageView->subresourceRange.levelCount - 1,
getTextureDataType(di->imageView->interpretedFormat),
di->imageView->viewType == VK_IMAGE_VIEW_TYPE_CUBE,
cubemapStride >> 12, //TODO cubemap stride in multiples of 4KB
di->imageView->image->levelOffsets[0] >> 12, //Image level 0 offset in multiples of 4KB
di->imageView->image->height & 2047,
di->imageView->image->width & 2047,
getMinFilterType(di->sampler->minFilter, di->sampler->mipmapMode),// di->sampler->maxLod),
di->sampler->magFilter == VK_FILTER_NEAREST,
getWrapMode(di->sampler->addressModeU),
getWrapMode(di->sampler->addressModeV),
di->sampler->disableAutoLod
);
uint32_t size = 0;
if(di->imageView->viewType == VK_IMAGE_VIEW_TYPE_1D)
{
size = 4;
}
else if(di->imageView->viewType == VK_IMAGE_VIEW_TYPE_2D)
{
size = 8;
}
else if(di->imageView->viewType == VK_IMAGE_VIEW_TYPE_CUBE)
{
size = 12;
}
else
{
assert(0); //unsupported
}
//TODO handle this properly
//TMU0_B requires an extra uniform written
//we need to signal that somehow from API side
//if mode is cubemap we don't need an extra uniform, it's included!
if(di->imageView->viewType != VK_IMAGE_VIEW_TYPE_CUBE && di->sampler->disableAutoLod)
{
size += 4;
}
//emit tex parameters
clFit(commandBuffer, &commandBuffer->uniformsCl, size);
clInsertData(&commandBuffer->uniformsCl, size, params);
}
}
}
}
//do it twice for vertex and then coordinate
for(uint32_t d = 0; d < 2; ++d)
{
for(uint32_t c = 0; c < cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]->numMappings; ++c)
{
VkRpiAssemblyMappingEXT mapping = cb->graphicsPipeline->modules[ulog2(VK_SHADER_STAGE_VERTEX_BIT)]->mappings[c];
if(mapping.shaderStage & VK_SHADER_STAGE_VERTEX_BIT)
{
if(mapping.mappingType == VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT)
{
clFit(commandBuffer, &commandBuffer->uniformsCl, 4);
clInsertData(&commandBuffer->uniformsCl, 4, cb->pushConstantBufferVertex + mapping.resourceOffset);
}
else if(mapping.mappingType == VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR)
{
}
else
{
assert(0); //shouldn't happen
}
}
}
}
return maxIndex;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdDraw
*/
void rpi_vkCmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance)
{
assert(commandBuffer);
if(instanceCount != 1 || firstInstance != 0)
{
unsigned instancing;
UNSUPPORTED(instancing);
}
drawCommon(commandBuffer);
_commandBuffer* cb = commandBuffer;
//Submit draw call: vertex Array Primitives
clFit(commandBuffer, &commandBuffer->binCl, V3D21_VERTEX_ARRAY_PRIMITIVES_length);
clInsertVertexArrayPrimitives(&commandBuffer->binCl, firstVertex, vertexCount, getPrimitiveMode(cb->graphicsPipeline->topology));
cb->numDrawCallsSubmitted++;
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdDrawIndexed(
VkCommandBuffer commandBuffer,
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance)
{
assert(commandBuffer);
if(instanceCount != 1 || firstInstance != 0)
{
unsigned instancing;
UNSUPPORTED(instancing);
}
uint32_t maxIndex = drawCommon(commandBuffer);
_commandBuffer* cb = commandBuffer;
clFit(commandBuffer, &commandBuffer->handlesCl, 4);
uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, cb->binCl.currMarker->handlesBuf, cb->binCl.currMarker->handlesSize, cb->indexBuffer->boundMem->bo);
clInsertGEMRelocations(&commandBuffer->binCl, idx, 0);
//Submit draw call: vertex Array Primitives
clFit(commandBuffer, &commandBuffer->binCl, V3D21_VERTEX_ARRAY_PRIMITIVES_length);
clInsertIndexedPrimitiveList(&commandBuffer->binCl,
maxIndex, //max index
0, //TODO we can pass an offset here
indexCount,
1, //we only support 16 bit indices
getPrimitiveMode(cb->graphicsPipeline->topology));
cb->numDrawCallsSubmitted++;
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdDrawIndexedIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
UNSUPPORTED(vkCmdDrawIndexedIndirect);
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdDrawIndirect(
VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride)
{
UNSUPPORTED(vkCmdDrawIndirect);
}