1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-02-19 16:54:18 +01:00

implemented depth/stencil shader patching

This commit is contained in:
Unknown 2019-09-29 19:06:26 +01:00
parent e529b69adc
commit ab337a9212
9 changed files with 253 additions and 129 deletions

View File

@ -1,11 +1,96 @@
#pragma once
#include <stdint.h>
#include "vc4_qpu_defines.h"
#ifdef __cplusplus
extern "C" {
#endif
uint64_t encode_alu(qpu_sig_bits sig_bits,
qpu_unpack unpack_mode,
//If the pm bit is set, the unpack field programs the r4 unpack unit,
//and the pack field is used to program the color
//conversion on the output of the mul unit
uint8_t pack_unpack_select,
uint8_t pack_mode,
qpu_cond add_cond,
qpu_cond mul_cond,
uint8_t set_flags, //Flags are updated from the add ALU unless the add ALU performed a NOP (or its condition code was NEVER) in which case flags are updated from the mul ALU
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
qpu_op_mul op_mul,
qpu_op_add op_add,
qpu_raddr raddr_a,
qpu_raddr raddr_b,
qpu_mux add_a,
qpu_mux add_b,
qpu_mux mul_a,
qpu_mux mul_b
);
uint64_t encode_alu_small_imm(qpu_unpack unpack_mode,
uint8_t pack_unpack_select,
uint8_t pack_mode,
qpu_cond add_cond,
qpu_cond mul_cond,
uint8_t set_flags, //Flags are updated from the add ALU unless the add ALU performed a NOP (or its condition code was NEVER) in which case flags are updated from the mul ALU
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
qpu_op_mul op_mul,
qpu_op_add op_add,
qpu_raddr raddr_a,
uint8_t small_imm,
qpu_mux add_a,
qpu_mux add_b,
qpu_mux mul_a,
qpu_mux mul_b
);
uint64_t encode_branch(qpu_branch_cond branch_cond,
uint8_t is_relative, //if set branch target is relative to PC+4
uint8_t use_raddr_a, //if set add value of raddr_a (from simd elem 0) to branch target
qpu_raddr raddr_a,
uint8_t write_swap_bit,
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
uint32_t imm //always added to branch target, set to 0 if unused
);
uint64_t encode_semaphore(uint8_t pack_unpack_select,
uint8_t pack_mode,
qpu_cond cond_add,
qpu_cond cond_mul,
uint8_t set_flags,
uint8_t write_swap,
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
uint8_t incr_sem, //if 1 increment semaphore
uint8_t sem, //4 bit semaphore selector
uint32_t imm_val //27bit immediate value loaded into all 16 simd elements
);
uint64_t encode_load_imm(uint8_t pack_unpack_select,
uint8_t pack_mode,
qpu_cond cond_add,
qpu_cond cond_mul,
uint8_t set_flags,
uint8_t write_swap,
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
uint32_t imm //2x16bit or 1x32bit uint
);
uint64_t encode_load_imm_per_elem(
uint8_t signed_or_unsigned, //1 for signed, 0 for unsigned
uint8_t pack_unpack_select,
uint8_t pack_mode,
qpu_cond cond_add,
qpu_cond cond_mul,
uint8_t set_flags,
uint8_t write_swap,
qpu_waddr waddr_add,
qpu_waddr waddr_mul,
uint16_t ms_bit, //per element MS (sign) bit
uint16_t ls_bit //per element LS bit
);
void disassemble_qpu_asm(uint64_t instruction);
void assemble_qpu_asm(char* str, uint64_t* instructions);
unsigned get_num_instructions(char* ptr);

View File

@ -139,7 +139,7 @@ static const char *qpu_sig_bits_str[] = {
//Returns the small immediate value to be encoded in to the raddr b field if
//the argument can be represented as one, or ~0 otherwise.
//48: Small immediate value for rotate-by-r5, and 49-63 are "rotate by n channels"
uint8_t qpu_encode_small_immediate(uint32_t i)
static uint8_t qpu_encode_small_immediate(uint32_t i)
{
if (i <= 15)
return i;

View File

@ -657,11 +657,16 @@ void encodeTextureUniform(uint32_t* params, //array of 4 uint32_t
params[3] = 0;
}
void encodeDepthStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back)
void encodeStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back, uint8_t stencilTestEnable)
{
assert(values);
assert(numValues);
if(!stencilTestEnable)
{
front.compareOp = back.compareOp = VK_COMPARE_OP_ALWAYS;
}
if(front.compareMask == back.compareMask &&
front.compareOp == back.compareOp &&
front.depthFailOp == back.depthFailOp &&
@ -676,10 +681,10 @@ void encodeDepthStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpS
values[0] = 0
| (front.compareMask & 0xff)
| (front.reference & 0xff) << 0x8
| (getCompareOp(front.compareOp) & 0x3) << 16
| (getStencilOp(front.failOp) & 0x3) << 19
| (getStencilOp(front.passOp) & 0x3) << 22
| (getStencilOp(front.depthFailOp) & 0x3) << 25
| (getCompareOp(front.compareOp) & 0x7) << 16
| (getStencilOp(front.failOp) & 0x7) << 19
| (getStencilOp(front.passOp) & 0x7) << 22
| (getStencilOp(front.depthFailOp) & 0x7) << 25
| 3 << 30; //front and back
switch(front.writeMask)
@ -711,19 +716,19 @@ void encodeDepthStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpS
values[0] = 0
| (front.compareMask & 0xff)
| (front.reference & 0xff) << 0x8
| (getCompareOp(front.compareOp) & 0x3) << 16
| (getStencilOp(front.failOp) & 0x3) << 19
| (getStencilOp(front.passOp) & 0x3) << 22
| (getStencilOp(front.depthFailOp) & 0x3) << 25
| (getCompareOp(front.compareOp) & 0x7) << 16
| (getStencilOp(front.failOp) & 0x7) << 19
| (getStencilOp(front.passOp) & 0x7) << 22
| (getStencilOp(front.depthFailOp) & 0x7) << 25
| 1 << 30; //front
values[1] = 0
| (back.compareMask & 0xff)
| (back.reference & 0xff) << 0x8
| (getCompareOp(back.compareOp) & 0x3) << 16
| (getStencilOp(back.failOp) & 0x3) << 19
| (getStencilOp(back.passOp) & 0x3) << 22
| (getStencilOp(back.depthFailOp) & 0x3) << 25
| (getCompareOp(back.compareOp) & 0x7) << 16
| (getStencilOp(back.failOp) & 0x7) << 19
| (getStencilOp(back.passOp) & 0x7) << 22
| (getStencilOp(back.depthFailOp) & 0x7) << 25
| 2 << 30; //front
if((front.writeMask == 0x1 ||

View File

@ -217,6 +217,7 @@ typedef struct VkShaderModule_T
{
uint32_t bos[RPI_ASSEMBLY_TYPE_MAX];
uint32_t sizes[RPI_ASSEMBLY_TYPE_MAX];
uint64_t* instructions[RPI_ASSEMBLY_TYPE_MAX];
VkRpiAssemblyMappingEXT* mappings;
uint32_t numMappings;
uint32_t hasThreadSwitch;
@ -480,7 +481,7 @@ void encodeTextureUniform(uint32_t* params,
uint8_t wrapT,
uint8_t wrapS,
uint8_t noAutoLod);
void encodeDepthStencilValue(uint32_t* values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back);
void encodeStencilValue(uint32_t* values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back, uint8_t stencilTestEnable);
uint8_t getTextureDataType(VkFormat format);
uint8_t getMinFilterType(VkFilter minFilter, VkSamplerMipmapMode mipFilter, float maxLod);
uint8_t getWrapMode(VkSamplerAddressMode mode);

View File

@ -1,6 +1,7 @@
#include "common.h"
#include "kernel/vc4_packet.h"
#include "../QPUassembler/qpu_assembler.h"
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdBindPipeline
@ -20,6 +21,46 @@ void vkCmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeli
}
}
void patchShaderDepthStencilBlending(uint64_t** instructions, uint32_t* size, const VkPipelineDepthStencilStateCreateInfo* dsi, const VkAllocationCallbacks* pAllocator)
{
assert(instructions);
assert(size);
assert(dsi);
uint32_t numExtraInstructions = 0;
numExtraInstructions += dsi->depthWriteEnable || dsi->stencilTestEnable;
uint32_t values[3];
uint32_t numValues;
encodeStencilValue(values, &numValues, dsi->front, dsi->back, dsi->stencilTestEnable);
numExtraInstructions += numValues * 2;
uint32_t newSize = *size + numExtraInstructions * sizeof(uint64_t);
uint64_t* tmp = ALLOCATE(newSize, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
memset(tmp, 0, newSize);
memcpy(tmp + numExtraInstructions, *instructions, *size);
///"sig_load_imm ; r0 = load32.always(0xF497EEFF) ; nop = load32() ;" //stencil setup state
///"sig_none ; tlb_stencil_setup = or.always(r0, r0) ; nop = nop(r0, r0) ;"
for(uint32_t c = 0; c < numValues; ++c)
{
tmp[c] = encode_load_imm(0, 0, 1, 0, 0, 0, 32 + c, 39, values[c]); //r0 = load32.always(values[c])
tmp[numValues + c] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 43, 39, 0, 21, 0, 0, c, c, 0, 0); //tlb_stencil_setup = or.always(r0, r0)
}
///"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
if(dsi->depthWriteEnable || dsi->stencilTestEnable)
{
tmp[numValues*2] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 44, 39, 0, 21, 0, 15, 7, 7, 0, 0);
}
//replace instructions pointer
FREE(*instructions);
*instructions = tmp;
*size = newSize;
}
/*
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateGraphicsPipelines
*/
@ -47,122 +88,165 @@ VkResult vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCach
}
memset(pip->names, 0, sizeof(char*)*6);
memset(pip->modules, 0, sizeof(_shaderModule*)*6);
for(int d = 0; d < pCreateInfos->stageCount; ++d)
for(int d = 0; d < pCreateInfos[c].stageCount; ++d)
{
uint32_t idx = ulog2(pCreateInfos->pStages[d].stage);
pip->modules[idx] = pCreateInfos->pStages[d].module;
uint32_t idx = ulog2(pCreateInfos[c].pStages[d].stage);
pip->modules[idx] = pCreateInfos[c].pStages[d].module;
pip->names[idx] = ALLOCATE(strlen(pCreateInfos->pStages[d].pName)+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
_shaderModule* s = pip->modules[idx];
pip->names[idx] = ALLOCATE(strlen(pCreateInfos[c].pStages[d].pName)+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!pip->names[idx])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(pip->names[idx], pCreateInfos->pStages[d].pName, strlen(pCreateInfos->pStages[d].pName)+1);
memcpy(pip->names[idx], pCreateInfos[c].pStages[d].pName, strlen(pCreateInfos[c].pStages[d].pName)+1);
//patch fragment shader
if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_FRAGMENT_BIT)
{
patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pAllocator);
//TODO if debug...
for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT] / 8; ++e)
{
printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT][e]);
disassemble_qpu_asm(s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT][e]);
}
printf("\n");
s->bos[RPI_ASSEMBLY_TYPE_FRAGMENT] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT]);
}
if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_VERTEX_BIT)
{
//TODO if debug...
for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_VERTEX] / 8; ++e)
{
printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_VERTEX][e]);
disassemble_qpu_asm(s->instructions[RPI_ASSEMBLY_TYPE_VERTEX][e]);
}
printf("\n");
for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE] / 8; ++e)
{
printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE][e]);
disassemble_qpu_asm(s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE][e]);
}
printf("\n");
s->bos[RPI_ASSEMBLY_TYPE_COORDINATE] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE], &s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE]);
s->bos[RPI_ASSEMBLY_TYPE_VERTEX] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_VERTEX], &s->sizes[RPI_ASSEMBLY_TYPE_VERTEX]);
}
}
pip->vertexAttributeDescriptionCount = pCreateInfos->pVertexInputState->vertexAttributeDescriptionCount;
pip->vertexAttributeDescriptionCount = pCreateInfos[c].pVertexInputState->vertexAttributeDescriptionCount;
pip->vertexAttributeDescriptions = ALLOCATE(sizeof(VkVertexInputAttributeDescription) * pip->vertexAttributeDescriptionCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!pip->vertexAttributeDescriptions)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(pip->vertexAttributeDescriptions, pCreateInfos->pVertexInputState->pVertexAttributeDescriptions, sizeof(VkVertexInputAttributeDescription) * pip->vertexAttributeDescriptionCount);
memcpy(pip->vertexAttributeDescriptions, pCreateInfos[c].pVertexInputState->pVertexAttributeDescriptions, sizeof(VkVertexInputAttributeDescription) * pip->vertexAttributeDescriptionCount);
pip->vertexBindingDescriptionCount = pCreateInfos->pVertexInputState->vertexBindingDescriptionCount;
pip->vertexBindingDescriptionCount = pCreateInfos[c].pVertexInputState->vertexBindingDescriptionCount;
pip->vertexBindingDescriptions = ALLOCATE(sizeof(VkVertexInputBindingDescription) * pip->vertexBindingDescriptionCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!pip->vertexBindingDescriptions)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(pip->vertexBindingDescriptions, pCreateInfos->pVertexInputState->pVertexBindingDescriptions, sizeof(VkVertexInputBindingDescription) * pip->vertexBindingDescriptionCount);
memcpy(pip->vertexBindingDescriptions, pCreateInfos[c].pVertexInputState->pVertexBindingDescriptions, sizeof(VkVertexInputBindingDescription) * pip->vertexBindingDescriptionCount);
pip->topology = pCreateInfos->pInputAssemblyState->topology;
pip->primitiveRestartEnable = pCreateInfos->pInputAssemblyState->primitiveRestartEnable;
pip->topology = pCreateInfos[c].pInputAssemblyState->topology;
pip->primitiveRestartEnable = pCreateInfos[c].pInputAssemblyState->primitiveRestartEnable;
//tessellation ignored
pip->viewportCount = pCreateInfos->pViewportState->viewportCount;
pip->viewportCount = pCreateInfos[c].pViewportState->viewportCount;
pip->viewports = ALLOCATE(sizeof(VkViewport) * pip->viewportCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!pip->viewports)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(pip->viewports, pCreateInfos->pViewportState->pViewports, sizeof(VkViewport) * pip->viewportCount);
memcpy(pip->viewports, pCreateInfos[c].pViewportState->pViewports, sizeof(VkViewport) * pip->viewportCount);
pip->scissorCount = pCreateInfos->pViewportState->scissorCount;
pip->scissorCount = pCreateInfos[c].pViewportState->scissorCount;
pip->scissors = ALLOCATE(sizeof(VkRect2D) * pip->viewportCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!pip->scissors)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(pip->scissors, pCreateInfos->pViewportState->pScissors, sizeof(VkRect2D) * pip->scissorCount);
memcpy(pip->scissors, pCreateInfos[c].pViewportState->pScissors, sizeof(VkRect2D) * pip->scissorCount);
pip->depthClampEnable = pCreateInfos->pRasterizationState->depthClampEnable;
pip->rasterizerDiscardEnable = pCreateInfos->pRasterizationState->rasterizerDiscardEnable;
pip->polygonMode = pCreateInfos->pRasterizationState->polygonMode;
pip->cullMode = pCreateInfos->pRasterizationState->cullMode;
pip->frontFace = pCreateInfos->pRasterizationState->frontFace;
pip->depthBiasEnable = pCreateInfos->pRasterizationState->depthBiasEnable;
pip->depthBiasConstantFactor = pCreateInfos->pRasterizationState->depthBiasConstantFactor;
pip->depthBiasClamp = pCreateInfos->pRasterizationState->depthBiasClamp;
pip->depthBiasSlopeFactor = pCreateInfos->pRasterizationState->depthBiasSlopeFactor;
pip->lineWidth = pCreateInfos->pRasterizationState->lineWidth;
pip->depthClampEnable = pCreateInfos[c].pRasterizationState->depthClampEnable;
pip->rasterizerDiscardEnable = pCreateInfos[c].pRasterizationState->rasterizerDiscardEnable;
pip->polygonMode = pCreateInfos[c].pRasterizationState->polygonMode;
pip->cullMode = pCreateInfos[c].pRasterizationState->cullMode;
pip->frontFace = pCreateInfos[c].pRasterizationState->frontFace;
pip->depthBiasEnable = pCreateInfos[c].pRasterizationState->depthBiasEnable;
pip->depthBiasConstantFactor = pCreateInfos[c].pRasterizationState->depthBiasConstantFactor;
pip->depthBiasClamp = pCreateInfos[c].pRasterizationState->depthBiasClamp;
pip->depthBiasSlopeFactor = pCreateInfos[c].pRasterizationState->depthBiasSlopeFactor;
pip->lineWidth = pCreateInfos[c].pRasterizationState->lineWidth;
pip->rasterizationSamples = pCreateInfos->pMultisampleState->rasterizationSamples;
pip->sampleShadingEnable = pCreateInfos->pMultisampleState->sampleShadingEnable;
pip->minSampleShading = pCreateInfos->pMultisampleState->minSampleShading;
if(pCreateInfos->pMultisampleState->pSampleMask)
pip->rasterizationSamples = pCreateInfos[c].pMultisampleState->rasterizationSamples;
pip->sampleShadingEnable = pCreateInfos[c].pMultisampleState->sampleShadingEnable;
pip->minSampleShading = pCreateInfos[c].pMultisampleState->minSampleShading;
if(pCreateInfos[c].pMultisampleState->pSampleMask)
{
pip->sampleMask = *pCreateInfos->pMultisampleState->pSampleMask;
pip->sampleMask = *pCreateInfos[c].pMultisampleState->pSampleMask;
}
else
{
pip->sampleMask = 0;
}
pip->alphaToCoverageEnable = pCreateInfos->pMultisampleState->alphaToCoverageEnable;
pip->alphaToOneEnable = pCreateInfos->pMultisampleState->alphaToOneEnable;
pip->alphaToCoverageEnable = pCreateInfos[c].pMultisampleState->alphaToCoverageEnable;
pip->alphaToOneEnable = pCreateInfos[c].pMultisampleState->alphaToOneEnable;
pip->depthTestEnable = pCreateInfos->pDepthStencilState->depthTestEnable;
pip->depthWriteEnable = pCreateInfos->pDepthStencilState->depthWriteEnable;
pip->depthCompareOp = pCreateInfos->pDepthStencilState->depthCompareOp;
pip->depthBoundsTestEnable = pCreateInfos->pDepthStencilState->depthBoundsTestEnable;
pip->stencilTestEnable = pCreateInfos->pDepthStencilState->stencilTestEnable;
pip->front = pCreateInfos->pDepthStencilState->front;
pip->back = pCreateInfos->pDepthStencilState->back;
pip->minDepthBounds = pCreateInfos->pDepthStencilState->minDepthBounds;
pip->maxDepthBounds = pCreateInfos->pDepthStencilState->maxDepthBounds;
pip->depthTestEnable = pCreateInfos[c].pDepthStencilState->depthTestEnable;
pip->depthWriteEnable = pCreateInfos[c].pDepthStencilState->depthWriteEnable;
pip->depthCompareOp = pCreateInfos[c].pDepthStencilState->depthCompareOp;
pip->depthBoundsTestEnable = pCreateInfos[c].pDepthStencilState->depthBoundsTestEnable;
pip->stencilTestEnable = pCreateInfos[c].pDepthStencilState->stencilTestEnable;
pip->front = pCreateInfos[c].pDepthStencilState->front;
pip->back = pCreateInfos[c].pDepthStencilState->back;
pip->minDepthBounds = pCreateInfos[c].pDepthStencilState->minDepthBounds;
pip->maxDepthBounds = pCreateInfos[c].pDepthStencilState->maxDepthBounds;
pip->logicOpEnable = pCreateInfos->pColorBlendState->logicOpEnable;
pip->logicOp = pCreateInfos->pColorBlendState->logicOp;
pip->attachmentCount = pCreateInfos->pColorBlendState->attachmentCount;
pip->logicOpEnable = pCreateInfos[c].pColorBlendState->logicOpEnable;
pip->logicOp = pCreateInfos[c].pColorBlendState->logicOp;
pip->attachmentCount = pCreateInfos[c].pColorBlendState->attachmentCount;
pip->attachmentBlendStates = ALLOCATE(sizeof(VkPipelineColorBlendAttachmentState) * pip->attachmentCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!pip->attachmentBlendStates)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(pip->attachmentBlendStates, pCreateInfos->pColorBlendState->pAttachments, sizeof(VkPipelineColorBlendAttachmentState) * pip->attachmentCount);
memcpy(pip->attachmentBlendStates, pCreateInfos[c].pColorBlendState->pAttachments, sizeof(VkPipelineColorBlendAttachmentState) * pip->attachmentCount);
memcpy(pip->blendConstants, pCreateInfos->pColorBlendState, sizeof(float)*4);
memcpy(pip->blendConstants, pCreateInfos[c].pColorBlendState, sizeof(float)*4);
if(pCreateInfos->pDynamicState)
if(pCreateInfos[c].pDynamicState)
{
pip->dynamicStateCount = pCreateInfos->pDynamicState->dynamicStateCount;
pip->dynamicStateCount = pCreateInfos[c].pDynamicState->dynamicStateCount;
pip->dynamicStates = ALLOCATE(sizeof(VkDynamicState)*pip->dynamicStateCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!pip->dynamicStates)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(pip->dynamicStates, pCreateInfos->pDynamicState->pDynamicStates, sizeof(VkDynamicState)*pip->dynamicStateCount);
memcpy(pip->dynamicStates, pCreateInfos[c].pDynamicState->pDynamicStates, sizeof(VkDynamicState)*pip->dynamicStateCount);
}
else
{
@ -170,9 +254,9 @@ VkResult vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCach
pip->dynamicStates = 0;
}
pip->layout = pCreateInfos->layout;
pip->renderPass = pCreateInfos->renderPass;
pip->subpass = pCreateInfos->subpass;
pip->layout = pCreateInfos[c].layout;
pip->renderPass = pCreateInfos[c].renderPass;
pip->subpass = pCreateInfos[c].subpass;
//TODO derivative pipelines ignored

View File

@ -33,8 +33,8 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
uint32_t size = sizeof(uint64_t)*numInstructions;
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
uint64_t* instructions = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!instructions)
shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if(!shader->instructions[c])
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
@ -44,22 +44,13 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
memcpy(tmpShaderStr, pCreateInfo->asmStrings[c], stringLength+1);
assemble_qpu_asm(tmpShaderStr, instructions);
assemble_qpu_asm(tmpShaderStr, shader->instructions[c]);
FREE(tmpShaderStr);
shader->bos[c] = vc4_bo_alloc_shader(controlFd, instructions, &size);
//TODO if debug...
for(uint64_t d = 0; d < numInstructions; ++d)
{
printf("%#llx ", instructions[d]);
disassemble_qpu_asm(instructions[d]);
}
for(uint64_t d = 0; d < numInstructions; ++d)
{
uint64_t s = (instructions[d] & (0xfll << 60)) >> 60;
uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60;
if(s == 2ll)
{
shader->hasThreadSwitch = 1;
@ -70,14 +61,14 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
shader->numVaryings = 0;
for(uint64_t d = 0; d < numInstructions; ++d)
{
unsigned is_sem = ((instructions[d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((instructions[d] & (0xfll << 60)) >> 60);
unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60);
//if it's an ALU instruction
if(!is_sem && sig_bits != 14 && sig_bits != 15)
{
unsigned raddr_a = ((instructions[d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((instructions[d] & (0x3fll << 12)) >> 12);
unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18);
unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12);
if(raddr_a == 35)
{
@ -92,10 +83,6 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
}
}
printf("\n");
FREE(instructions);
shader->sizes[c] = size;
}
else

View File

@ -949,9 +949,6 @@ void CreateShaders()
/**/
//display a color
char fs_asm_code[] =
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
/// instead of outputting the final color
/// we patch the shader (eventually in the driver)
/// so that it performs the desired blending mode

View File

@ -1087,9 +1087,6 @@ void CreateShaders()
/**/
//display a color
char fs_asm_code[] =
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
"sig_none ; tlb_color_all = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"

View File

@ -1087,22 +1087,6 @@ void CreateShaders()
/**/
//display a color
char fs_asm_code[] =
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///stencil setup state
/// 28 24 20 16 12 8 4 0
///1111 0100 1001 0111 1110 1110 1111 1111
/// -> 0xF497EEFF
///selection = front and back (0x3)
///write mask = 0xff
///z test fail op = replace (0x2)
///z test pass op = replace (0x2)
///stencil test fail op = replace (0x2)
///stencil function = always (0x7)
///stencil ref value = 0xee
///stencil function mask = 0xff
"sig_load_imm ; r0 = load32.always(0xF497EEFF) ; nop = load32() ;" //stencil setup state
"sig_none ; tlb_stencil_setup = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
///omit color write
"sig_none ; r0 = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
@ -1114,22 +1098,6 @@ void CreateShaders()
/**/
//display a color
char fs_asm_code2[] =
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///stencil setup state
/// 28 24 20 16 12 8 4 0
///1111 0010 0100 1101 1110 1110 1111 1111
/// -> 0xF24DEEFF
///selection = front and back (0x3)
///write mask = 0xff
///z test fail op = keep (0x1)
///z test pass op = keep (0x1)
///stencil test fail op = keep (0x1)
///stencil function = not equal (0x5)
///stencil ref value = 0xee
///stencil function mask = 0xff
"sig_load_imm ; r0 = load32.always(0xF24DEEFF) ; nop = load32() ;" //stencil setup state
"sig_none ; tlb_stencil_setup = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
"sig_none ; tlb_color_all = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
@ -1336,9 +1304,9 @@ void CreatePipeline()
depthStencilState.front.compareOp = VK_COMPARE_OP_NOT_EQUAL;
depthStencilState.front.depthFailOp = VK_STENCIL_OP_REPLACE;
depthStencilState.front.failOp = VK_STENCIL_OP_REPLACE;
depthStencilState.front.passOp = VK_STENCIL_OP_REPLACE;
depthStencilState.front.depthFailOp = VK_STENCIL_OP_KEEP;
depthStencilState.front.failOp = VK_STENCIL_OP_KEEP;
depthStencilState.front.passOp = VK_STENCIL_OP_KEEP;
depthStencilState.back = depthStencilState.front;
blendAttachState.colorWriteMask = 0xf;