mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2025-02-19 16:54:18 +01:00
implemented depth/stencil shader patching
This commit is contained in:
parent
e529b69adc
commit
ab337a9212
@ -1,11 +1,96 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "vc4_qpu_defines.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint64_t encode_alu(qpu_sig_bits sig_bits,
|
||||
qpu_unpack unpack_mode,
|
||||
//If the pm bit is set, the unpack field programs the r4 unpack unit,
|
||||
//and the pack field is used to program the color
|
||||
//conversion on the output of the mul unit
|
||||
uint8_t pack_unpack_select,
|
||||
uint8_t pack_mode,
|
||||
qpu_cond add_cond,
|
||||
qpu_cond mul_cond,
|
||||
uint8_t set_flags, //Flags are updated from the add ALU unless the add ALU performed a NOP (or its condition code was NEVER) in which case flags are updated from the mul ALU
|
||||
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
|
||||
qpu_waddr waddr_add,
|
||||
qpu_waddr waddr_mul,
|
||||
qpu_op_mul op_mul,
|
||||
qpu_op_add op_add,
|
||||
qpu_raddr raddr_a,
|
||||
qpu_raddr raddr_b,
|
||||
qpu_mux add_a,
|
||||
qpu_mux add_b,
|
||||
qpu_mux mul_a,
|
||||
qpu_mux mul_b
|
||||
);
|
||||
uint64_t encode_alu_small_imm(qpu_unpack unpack_mode,
|
||||
uint8_t pack_unpack_select,
|
||||
uint8_t pack_mode,
|
||||
qpu_cond add_cond,
|
||||
qpu_cond mul_cond,
|
||||
uint8_t set_flags, //Flags are updated from the add ALU unless the add ALU performed a NOP (or its condition code was NEVER) in which case flags are updated from the mul ALU
|
||||
uint8_t write_swap_flag, //0: add writes to A, mul to B, 1: add writes to B, mul to A
|
||||
qpu_waddr waddr_add,
|
||||
qpu_waddr waddr_mul,
|
||||
qpu_op_mul op_mul,
|
||||
qpu_op_add op_add,
|
||||
qpu_raddr raddr_a,
|
||||
uint8_t small_imm,
|
||||
qpu_mux add_a,
|
||||
qpu_mux add_b,
|
||||
qpu_mux mul_a,
|
||||
qpu_mux mul_b
|
||||
);
|
||||
uint64_t encode_branch(qpu_branch_cond branch_cond,
|
||||
uint8_t is_relative, //if set branch target is relative to PC+4
|
||||
uint8_t use_raddr_a, //if set add value of raddr_a (from simd elem 0) to branch target
|
||||
qpu_raddr raddr_a,
|
||||
uint8_t write_swap_bit,
|
||||
qpu_waddr waddr_add,
|
||||
qpu_waddr waddr_mul,
|
||||
uint32_t imm //always added to branch target, set to 0 if unused
|
||||
);
|
||||
uint64_t encode_semaphore(uint8_t pack_unpack_select,
|
||||
uint8_t pack_mode,
|
||||
qpu_cond cond_add,
|
||||
qpu_cond cond_mul,
|
||||
uint8_t set_flags,
|
||||
uint8_t write_swap,
|
||||
qpu_waddr waddr_add,
|
||||
qpu_waddr waddr_mul,
|
||||
uint8_t incr_sem, //if 1 increment semaphore
|
||||
uint8_t sem, //4 bit semaphore selector
|
||||
uint32_t imm_val //27bit immediate value loaded into all 16 simd elements
|
||||
);
|
||||
uint64_t encode_load_imm(uint8_t pack_unpack_select,
|
||||
uint8_t pack_mode,
|
||||
qpu_cond cond_add,
|
||||
qpu_cond cond_mul,
|
||||
uint8_t set_flags,
|
||||
uint8_t write_swap,
|
||||
qpu_waddr waddr_add,
|
||||
qpu_waddr waddr_mul,
|
||||
uint32_t imm //2x16bit or 1x32bit uint
|
||||
);
|
||||
uint64_t encode_load_imm_per_elem(
|
||||
uint8_t signed_or_unsigned, //1 for signed, 0 for unsigned
|
||||
uint8_t pack_unpack_select,
|
||||
uint8_t pack_mode,
|
||||
qpu_cond cond_add,
|
||||
qpu_cond cond_mul,
|
||||
uint8_t set_flags,
|
||||
uint8_t write_swap,
|
||||
qpu_waddr waddr_add,
|
||||
qpu_waddr waddr_mul,
|
||||
uint16_t ms_bit, //per element MS (sign) bit
|
||||
uint16_t ls_bit //per element LS bit
|
||||
);
|
||||
void disassemble_qpu_asm(uint64_t instruction);
|
||||
void assemble_qpu_asm(char* str, uint64_t* instructions);
|
||||
unsigned get_num_instructions(char* ptr);
|
||||
|
@ -139,7 +139,7 @@ static const char *qpu_sig_bits_str[] = {
|
||||
//Returns the small immediate value to be encoded in to the raddr b field if
|
||||
//the argument can be represented as one, or ~0 otherwise.
|
||||
//48: Small immediate value for rotate-by-r5, and 49-63 are "rotate by n channels"
|
||||
uint8_t qpu_encode_small_immediate(uint32_t i)
|
||||
static uint8_t qpu_encode_small_immediate(uint32_t i)
|
||||
{
|
||||
if (i <= 15)
|
||||
return i;
|
||||
|
@ -657,11 +657,16 @@ void encodeTextureUniform(uint32_t* params, //array of 4 uint32_t
|
||||
params[3] = 0;
|
||||
}
|
||||
|
||||
void encodeDepthStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back)
|
||||
void encodeStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back, uint8_t stencilTestEnable)
|
||||
{
|
||||
assert(values);
|
||||
assert(numValues);
|
||||
|
||||
if(!stencilTestEnable)
|
||||
{
|
||||
front.compareOp = back.compareOp = VK_COMPARE_OP_ALWAYS;
|
||||
}
|
||||
|
||||
if(front.compareMask == back.compareMask &&
|
||||
front.compareOp == back.compareOp &&
|
||||
front.depthFailOp == back.depthFailOp &&
|
||||
@ -676,10 +681,10 @@ void encodeDepthStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpS
|
||||
values[0] = 0
|
||||
| (front.compareMask & 0xff)
|
||||
| (front.reference & 0xff) << 0x8
|
||||
| (getCompareOp(front.compareOp) & 0x3) << 16
|
||||
| (getStencilOp(front.failOp) & 0x3) << 19
|
||||
| (getStencilOp(front.passOp) & 0x3) << 22
|
||||
| (getStencilOp(front.depthFailOp) & 0x3) << 25
|
||||
| (getCompareOp(front.compareOp) & 0x7) << 16
|
||||
| (getStencilOp(front.failOp) & 0x7) << 19
|
||||
| (getStencilOp(front.passOp) & 0x7) << 22
|
||||
| (getStencilOp(front.depthFailOp) & 0x7) << 25
|
||||
| 3 << 30; //front and back
|
||||
|
||||
switch(front.writeMask)
|
||||
@ -711,19 +716,19 @@ void encodeDepthStencilValue(uint32_t *values, uint32_t* numValues, VkStencilOpS
|
||||
values[0] = 0
|
||||
| (front.compareMask & 0xff)
|
||||
| (front.reference & 0xff) << 0x8
|
||||
| (getCompareOp(front.compareOp) & 0x3) << 16
|
||||
| (getStencilOp(front.failOp) & 0x3) << 19
|
||||
| (getStencilOp(front.passOp) & 0x3) << 22
|
||||
| (getStencilOp(front.depthFailOp) & 0x3) << 25
|
||||
| (getCompareOp(front.compareOp) & 0x7) << 16
|
||||
| (getStencilOp(front.failOp) & 0x7) << 19
|
||||
| (getStencilOp(front.passOp) & 0x7) << 22
|
||||
| (getStencilOp(front.depthFailOp) & 0x7) << 25
|
||||
| 1 << 30; //front
|
||||
|
||||
values[1] = 0
|
||||
| (back.compareMask & 0xff)
|
||||
| (back.reference & 0xff) << 0x8
|
||||
| (getCompareOp(back.compareOp) & 0x3) << 16
|
||||
| (getStencilOp(back.failOp) & 0x3) << 19
|
||||
| (getStencilOp(back.passOp) & 0x3) << 22
|
||||
| (getStencilOp(back.depthFailOp) & 0x3) << 25
|
||||
| (getCompareOp(back.compareOp) & 0x7) << 16
|
||||
| (getStencilOp(back.failOp) & 0x7) << 19
|
||||
| (getStencilOp(back.passOp) & 0x7) << 22
|
||||
| (getStencilOp(back.depthFailOp) & 0x7) << 25
|
||||
| 2 << 30; //front
|
||||
|
||||
if((front.writeMask == 0x1 ||
|
||||
|
@ -217,6 +217,7 @@ typedef struct VkShaderModule_T
|
||||
{
|
||||
uint32_t bos[RPI_ASSEMBLY_TYPE_MAX];
|
||||
uint32_t sizes[RPI_ASSEMBLY_TYPE_MAX];
|
||||
uint64_t* instructions[RPI_ASSEMBLY_TYPE_MAX];
|
||||
VkRpiAssemblyMappingEXT* mappings;
|
||||
uint32_t numMappings;
|
||||
uint32_t hasThreadSwitch;
|
||||
@ -480,7 +481,7 @@ void encodeTextureUniform(uint32_t* params,
|
||||
uint8_t wrapT,
|
||||
uint8_t wrapS,
|
||||
uint8_t noAutoLod);
|
||||
void encodeDepthStencilValue(uint32_t* values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back);
|
||||
void encodeStencilValue(uint32_t* values, uint32_t* numValues, VkStencilOpState front, VkStencilOpState back, uint8_t stencilTestEnable);
|
||||
uint8_t getTextureDataType(VkFormat format);
|
||||
uint8_t getMinFilterType(VkFilter minFilter, VkSamplerMipmapMode mipFilter, float maxLod);
|
||||
uint8_t getWrapMode(VkSamplerAddressMode mode);
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "common.h"
|
||||
|
||||
#include "kernel/vc4_packet.h"
|
||||
#include "../QPUassembler/qpu_assembler.h"
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdBindPipeline
|
||||
@ -20,6 +21,46 @@ void vkCmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeli
|
||||
}
|
||||
}
|
||||
|
||||
void patchShaderDepthStencilBlending(uint64_t** instructions, uint32_t* size, const VkPipelineDepthStencilStateCreateInfo* dsi, const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
assert(instructions);
|
||||
assert(size);
|
||||
assert(dsi);
|
||||
|
||||
uint32_t numExtraInstructions = 0;
|
||||
numExtraInstructions += dsi->depthWriteEnable || dsi->stencilTestEnable;
|
||||
|
||||
uint32_t values[3];
|
||||
uint32_t numValues;
|
||||
encodeStencilValue(values, &numValues, dsi->front, dsi->back, dsi->stencilTestEnable);
|
||||
|
||||
numExtraInstructions += numValues * 2;
|
||||
|
||||
uint32_t newSize = *size + numExtraInstructions * sizeof(uint64_t);
|
||||
uint64_t* tmp = ALLOCATE(newSize, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
memset(tmp, 0, newSize);
|
||||
memcpy(tmp + numExtraInstructions, *instructions, *size);
|
||||
|
||||
///"sig_load_imm ; r0 = load32.always(0xF497EEFF) ; nop = load32() ;" //stencil setup state
|
||||
///"sig_none ; tlb_stencil_setup = or.always(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
for(uint32_t c = 0; c < numValues; ++c)
|
||||
{
|
||||
tmp[c] = encode_load_imm(0, 0, 1, 0, 0, 0, 32 + c, 39, values[c]); //r0 = load32.always(values[c])
|
||||
tmp[numValues + c] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 43, 39, 0, 21, 0, 0, c, c, 0, 0); //tlb_stencil_setup = or.always(r0, r0)
|
||||
}
|
||||
|
||||
///"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
|
||||
if(dsi->depthWriteEnable || dsi->stencilTestEnable)
|
||||
{
|
||||
tmp[numValues*2] = encode_alu(1, 0, 0, 0, 1, 0, 0, 0, 44, 39, 0, 21, 0, 15, 7, 7, 0, 0);
|
||||
}
|
||||
|
||||
//replace instructions pointer
|
||||
FREE(*instructions);
|
||||
*instructions = tmp;
|
||||
*size = newSize;
|
||||
}
|
||||
|
||||
/*
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateGraphicsPipelines
|
||||
*/
|
||||
@ -47,122 +88,165 @@ VkResult vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCach
|
||||
}
|
||||
|
||||
memset(pip->names, 0, sizeof(char*)*6);
|
||||
memset(pip->modules, 0, sizeof(_shaderModule*)*6);
|
||||
|
||||
for(int d = 0; d < pCreateInfos->stageCount; ++d)
|
||||
for(int d = 0; d < pCreateInfos[c].stageCount; ++d)
|
||||
{
|
||||
uint32_t idx = ulog2(pCreateInfos->pStages[d].stage);
|
||||
pip->modules[idx] = pCreateInfos->pStages[d].module;
|
||||
uint32_t idx = ulog2(pCreateInfos[c].pStages[d].stage);
|
||||
pip->modules[idx] = pCreateInfos[c].pStages[d].module;
|
||||
|
||||
pip->names[idx] = ALLOCATE(strlen(pCreateInfos->pStages[d].pName)+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
_shaderModule* s = pip->modules[idx];
|
||||
|
||||
pip->names[idx] = ALLOCATE(strlen(pCreateInfos[c].pStages[d].pName)+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!pip->names[idx])
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memcpy(pip->names[idx], pCreateInfos->pStages[d].pName, strlen(pCreateInfos->pStages[d].pName)+1);
|
||||
memcpy(pip->names[idx], pCreateInfos[c].pStages[d].pName, strlen(pCreateInfos[c].pStages[d].pName)+1);
|
||||
|
||||
//patch fragment shader
|
||||
if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_FRAGMENT_BIT)
|
||||
{
|
||||
patchShaderDepthStencilBlending(&s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT], pCreateInfos[c].pDepthStencilState, pAllocator);
|
||||
|
||||
//TODO if debug...
|
||||
for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT] / 8; ++e)
|
||||
{
|
||||
printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT][e]);
|
||||
disassemble_qpu_asm(s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT][e]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
s->bos[RPI_ASSEMBLY_TYPE_FRAGMENT] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_FRAGMENT], &s->sizes[RPI_ASSEMBLY_TYPE_FRAGMENT]);
|
||||
}
|
||||
|
||||
if(pCreateInfos[c].pStages[d].stage & VK_SHADER_STAGE_VERTEX_BIT)
|
||||
{
|
||||
//TODO if debug...
|
||||
for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_VERTEX] / 8; ++e)
|
||||
{
|
||||
printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_VERTEX][e]);
|
||||
disassemble_qpu_asm(s->instructions[RPI_ASSEMBLY_TYPE_VERTEX][e]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
for(uint64_t e = 0; e < s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE] / 8; ++e)
|
||||
{
|
||||
printf("%#llx ", s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE][e]);
|
||||
disassemble_qpu_asm(s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE][e]);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
s->bos[RPI_ASSEMBLY_TYPE_COORDINATE] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_COORDINATE], &s->sizes[RPI_ASSEMBLY_TYPE_COORDINATE]);
|
||||
s->bos[RPI_ASSEMBLY_TYPE_VERTEX] = vc4_bo_alloc_shader(controlFd, s->instructions[RPI_ASSEMBLY_TYPE_VERTEX], &s->sizes[RPI_ASSEMBLY_TYPE_VERTEX]);
|
||||
}
|
||||
}
|
||||
|
||||
pip->vertexAttributeDescriptionCount = pCreateInfos->pVertexInputState->vertexAttributeDescriptionCount;
|
||||
pip->vertexAttributeDescriptionCount = pCreateInfos[c].pVertexInputState->vertexAttributeDescriptionCount;
|
||||
pip->vertexAttributeDescriptions = ALLOCATE(sizeof(VkVertexInputAttributeDescription) * pip->vertexAttributeDescriptionCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!pip->vertexAttributeDescriptions)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memcpy(pip->vertexAttributeDescriptions, pCreateInfos->pVertexInputState->pVertexAttributeDescriptions, sizeof(VkVertexInputAttributeDescription) * pip->vertexAttributeDescriptionCount);
|
||||
memcpy(pip->vertexAttributeDescriptions, pCreateInfos[c].pVertexInputState->pVertexAttributeDescriptions, sizeof(VkVertexInputAttributeDescription) * pip->vertexAttributeDescriptionCount);
|
||||
|
||||
pip->vertexBindingDescriptionCount = pCreateInfos->pVertexInputState->vertexBindingDescriptionCount;
|
||||
pip->vertexBindingDescriptionCount = pCreateInfos[c].pVertexInputState->vertexBindingDescriptionCount;
|
||||
pip->vertexBindingDescriptions = ALLOCATE(sizeof(VkVertexInputBindingDescription) * pip->vertexBindingDescriptionCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!pip->vertexBindingDescriptions)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memcpy(pip->vertexBindingDescriptions, pCreateInfos->pVertexInputState->pVertexBindingDescriptions, sizeof(VkVertexInputBindingDescription) * pip->vertexBindingDescriptionCount);
|
||||
memcpy(pip->vertexBindingDescriptions, pCreateInfos[c].pVertexInputState->pVertexBindingDescriptions, sizeof(VkVertexInputBindingDescription) * pip->vertexBindingDescriptionCount);
|
||||
|
||||
pip->topology = pCreateInfos->pInputAssemblyState->topology;
|
||||
pip->primitiveRestartEnable = pCreateInfos->pInputAssemblyState->primitiveRestartEnable;
|
||||
pip->topology = pCreateInfos[c].pInputAssemblyState->topology;
|
||||
pip->primitiveRestartEnable = pCreateInfos[c].pInputAssemblyState->primitiveRestartEnable;
|
||||
|
||||
//tessellation ignored
|
||||
|
||||
pip->viewportCount = pCreateInfos->pViewportState->viewportCount;
|
||||
pip->viewportCount = pCreateInfos[c].pViewportState->viewportCount;
|
||||
pip->viewports = ALLOCATE(sizeof(VkViewport) * pip->viewportCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!pip->viewports)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memcpy(pip->viewports, pCreateInfos->pViewportState->pViewports, sizeof(VkViewport) * pip->viewportCount);
|
||||
memcpy(pip->viewports, pCreateInfos[c].pViewportState->pViewports, sizeof(VkViewport) * pip->viewportCount);
|
||||
|
||||
|
||||
pip->scissorCount = pCreateInfos->pViewportState->scissorCount;
|
||||
pip->scissorCount = pCreateInfos[c].pViewportState->scissorCount;
|
||||
pip->scissors = ALLOCATE(sizeof(VkRect2D) * pip->viewportCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!pip->scissors)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memcpy(pip->scissors, pCreateInfos->pViewportState->pScissors, sizeof(VkRect2D) * pip->scissorCount);
|
||||
memcpy(pip->scissors, pCreateInfos[c].pViewportState->pScissors, sizeof(VkRect2D) * pip->scissorCount);
|
||||
|
||||
pip->depthClampEnable = pCreateInfos->pRasterizationState->depthClampEnable;
|
||||
pip->rasterizerDiscardEnable = pCreateInfos->pRasterizationState->rasterizerDiscardEnable;
|
||||
pip->polygonMode = pCreateInfos->pRasterizationState->polygonMode;
|
||||
pip->cullMode = pCreateInfos->pRasterizationState->cullMode;
|
||||
pip->frontFace = pCreateInfos->pRasterizationState->frontFace;
|
||||
pip->depthBiasEnable = pCreateInfos->pRasterizationState->depthBiasEnable;
|
||||
pip->depthBiasConstantFactor = pCreateInfos->pRasterizationState->depthBiasConstantFactor;
|
||||
pip->depthBiasClamp = pCreateInfos->pRasterizationState->depthBiasClamp;
|
||||
pip->depthBiasSlopeFactor = pCreateInfos->pRasterizationState->depthBiasSlopeFactor;
|
||||
pip->lineWidth = pCreateInfos->pRasterizationState->lineWidth;
|
||||
pip->depthClampEnable = pCreateInfos[c].pRasterizationState->depthClampEnable;
|
||||
pip->rasterizerDiscardEnable = pCreateInfos[c].pRasterizationState->rasterizerDiscardEnable;
|
||||
pip->polygonMode = pCreateInfos[c].pRasterizationState->polygonMode;
|
||||
pip->cullMode = pCreateInfos[c].pRasterizationState->cullMode;
|
||||
pip->frontFace = pCreateInfos[c].pRasterizationState->frontFace;
|
||||
pip->depthBiasEnable = pCreateInfos[c].pRasterizationState->depthBiasEnable;
|
||||
pip->depthBiasConstantFactor = pCreateInfos[c].pRasterizationState->depthBiasConstantFactor;
|
||||
pip->depthBiasClamp = pCreateInfos[c].pRasterizationState->depthBiasClamp;
|
||||
pip->depthBiasSlopeFactor = pCreateInfos[c].pRasterizationState->depthBiasSlopeFactor;
|
||||
pip->lineWidth = pCreateInfos[c].pRasterizationState->lineWidth;
|
||||
|
||||
pip->rasterizationSamples = pCreateInfos->pMultisampleState->rasterizationSamples;
|
||||
pip->sampleShadingEnable = pCreateInfos->pMultisampleState->sampleShadingEnable;
|
||||
pip->minSampleShading = pCreateInfos->pMultisampleState->minSampleShading;
|
||||
if(pCreateInfos->pMultisampleState->pSampleMask)
|
||||
pip->rasterizationSamples = pCreateInfos[c].pMultisampleState->rasterizationSamples;
|
||||
pip->sampleShadingEnable = pCreateInfos[c].pMultisampleState->sampleShadingEnable;
|
||||
pip->minSampleShading = pCreateInfos[c].pMultisampleState->minSampleShading;
|
||||
if(pCreateInfos[c].pMultisampleState->pSampleMask)
|
||||
{
|
||||
pip->sampleMask = *pCreateInfos->pMultisampleState->pSampleMask;
|
||||
pip->sampleMask = *pCreateInfos[c].pMultisampleState->pSampleMask;
|
||||
}
|
||||
else
|
||||
{
|
||||
pip->sampleMask = 0;
|
||||
}
|
||||
pip->alphaToCoverageEnable = pCreateInfos->pMultisampleState->alphaToCoverageEnable;
|
||||
pip->alphaToOneEnable = pCreateInfos->pMultisampleState->alphaToOneEnable;
|
||||
pip->alphaToCoverageEnable = pCreateInfos[c].pMultisampleState->alphaToCoverageEnable;
|
||||
pip->alphaToOneEnable = pCreateInfos[c].pMultisampleState->alphaToOneEnable;
|
||||
|
||||
pip->depthTestEnable = pCreateInfos->pDepthStencilState->depthTestEnable;
|
||||
pip->depthWriteEnable = pCreateInfos->pDepthStencilState->depthWriteEnable;
|
||||
pip->depthCompareOp = pCreateInfos->pDepthStencilState->depthCompareOp;
|
||||
pip->depthBoundsTestEnable = pCreateInfos->pDepthStencilState->depthBoundsTestEnable;
|
||||
pip->stencilTestEnable = pCreateInfos->pDepthStencilState->stencilTestEnable;
|
||||
pip->front = pCreateInfos->pDepthStencilState->front;
|
||||
pip->back = pCreateInfos->pDepthStencilState->back;
|
||||
pip->minDepthBounds = pCreateInfos->pDepthStencilState->minDepthBounds;
|
||||
pip->maxDepthBounds = pCreateInfos->pDepthStencilState->maxDepthBounds;
|
||||
pip->depthTestEnable = pCreateInfos[c].pDepthStencilState->depthTestEnable;
|
||||
pip->depthWriteEnable = pCreateInfos[c].pDepthStencilState->depthWriteEnable;
|
||||
pip->depthCompareOp = pCreateInfos[c].pDepthStencilState->depthCompareOp;
|
||||
pip->depthBoundsTestEnable = pCreateInfos[c].pDepthStencilState->depthBoundsTestEnable;
|
||||
pip->stencilTestEnable = pCreateInfos[c].pDepthStencilState->stencilTestEnable;
|
||||
pip->front = pCreateInfos[c].pDepthStencilState->front;
|
||||
pip->back = pCreateInfos[c].pDepthStencilState->back;
|
||||
pip->minDepthBounds = pCreateInfos[c].pDepthStencilState->minDepthBounds;
|
||||
pip->maxDepthBounds = pCreateInfos[c].pDepthStencilState->maxDepthBounds;
|
||||
|
||||
pip->logicOpEnable = pCreateInfos->pColorBlendState->logicOpEnable;
|
||||
pip->logicOp = pCreateInfos->pColorBlendState->logicOp;
|
||||
pip->attachmentCount = pCreateInfos->pColorBlendState->attachmentCount;
|
||||
pip->logicOpEnable = pCreateInfos[c].pColorBlendState->logicOpEnable;
|
||||
pip->logicOp = pCreateInfos[c].pColorBlendState->logicOp;
|
||||
pip->attachmentCount = pCreateInfos[c].pColorBlendState->attachmentCount;
|
||||
pip->attachmentBlendStates = ALLOCATE(sizeof(VkPipelineColorBlendAttachmentState) * pip->attachmentCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!pip->attachmentBlendStates)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memcpy(pip->attachmentBlendStates, pCreateInfos->pColorBlendState->pAttachments, sizeof(VkPipelineColorBlendAttachmentState) * pip->attachmentCount);
|
||||
memcpy(pip->attachmentBlendStates, pCreateInfos[c].pColorBlendState->pAttachments, sizeof(VkPipelineColorBlendAttachmentState) * pip->attachmentCount);
|
||||
|
||||
memcpy(pip->blendConstants, pCreateInfos->pColorBlendState, sizeof(float)*4);
|
||||
memcpy(pip->blendConstants, pCreateInfos[c].pColorBlendState, sizeof(float)*4);
|
||||
|
||||
|
||||
if(pCreateInfos->pDynamicState)
|
||||
if(pCreateInfos[c].pDynamicState)
|
||||
{
|
||||
pip->dynamicStateCount = pCreateInfos->pDynamicState->dynamicStateCount;
|
||||
pip->dynamicStateCount = pCreateInfos[c].pDynamicState->dynamicStateCount;
|
||||
pip->dynamicStates = ALLOCATE(sizeof(VkDynamicState)*pip->dynamicStateCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!pip->dynamicStates)
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memcpy(pip->dynamicStates, pCreateInfos->pDynamicState->pDynamicStates, sizeof(VkDynamicState)*pip->dynamicStateCount);
|
||||
memcpy(pip->dynamicStates, pCreateInfos[c].pDynamicState->pDynamicStates, sizeof(VkDynamicState)*pip->dynamicStateCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -170,9 +254,9 @@ VkResult vkCreateGraphicsPipelines(VkDevice device, VkPipelineCache pipelineCach
|
||||
pip->dynamicStates = 0;
|
||||
}
|
||||
|
||||
pip->layout = pCreateInfos->layout;
|
||||
pip->renderPass = pCreateInfos->renderPass;
|
||||
pip->subpass = pCreateInfos->subpass;
|
||||
pip->layout = pCreateInfos[c].layout;
|
||||
pip->renderPass = pCreateInfos[c].renderPass;
|
||||
pip->subpass = pCreateInfos[c].subpass;
|
||||
|
||||
//TODO derivative pipelines ignored
|
||||
|
||||
|
@ -33,8 +33,8 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
|
||||
uint32_t size = sizeof(uint64_t)*numInstructions;
|
||||
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
|
||||
//why not map kernel space mem to user space instead?
|
||||
uint64_t* instructions = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!instructions)
|
||||
shader->instructions[c] = ALLOCATE(size, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if(!shader->instructions[c])
|
||||
{
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
@ -44,22 +44,13 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
|
||||
char* tmpShaderStr = ALLOCATE(stringLength+1, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
memcpy(tmpShaderStr, pCreateInfo->asmStrings[c], stringLength+1);
|
||||
|
||||
assemble_qpu_asm(tmpShaderStr, instructions);
|
||||
assemble_qpu_asm(tmpShaderStr, shader->instructions[c]);
|
||||
|
||||
FREE(tmpShaderStr);
|
||||
|
||||
shader->bos[c] = vc4_bo_alloc_shader(controlFd, instructions, &size);
|
||||
|
||||
//TODO if debug...
|
||||
for(uint64_t d = 0; d < numInstructions; ++d)
|
||||
{
|
||||
printf("%#llx ", instructions[d]);
|
||||
disassemble_qpu_asm(instructions[d]);
|
||||
}
|
||||
|
||||
for(uint64_t d = 0; d < numInstructions; ++d)
|
||||
{
|
||||
uint64_t s = (instructions[d] & (0xfll << 60)) >> 60;
|
||||
uint64_t s = (shader->instructions[c][d] & (0xfll << 60)) >> 60;
|
||||
if(s == 2ll)
|
||||
{
|
||||
shader->hasThreadSwitch = 1;
|
||||
@ -70,14 +61,14 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
|
||||
shader->numVaryings = 0;
|
||||
for(uint64_t d = 0; d < numInstructions; ++d)
|
||||
{
|
||||
unsigned is_sem = ((instructions[d] & (0x7fll << 57)) >> 57) == 0x74;
|
||||
unsigned sig_bits = ((instructions[d] & (0xfll << 60)) >> 60);
|
||||
unsigned is_sem = ((shader->instructions[c][d] & (0x7fll << 57)) >> 57) == 0x74;
|
||||
unsigned sig_bits = ((shader->instructions[c][d] & (0xfll << 60)) >> 60);
|
||||
|
||||
//if it's an ALU instruction
|
||||
if(!is_sem && sig_bits != 14 && sig_bits != 15)
|
||||
{
|
||||
unsigned raddr_a = ((instructions[d] & (0x3fll << 18)) >> 18);
|
||||
unsigned raddr_b = ((instructions[d] & (0x3fll << 12)) >> 12);
|
||||
unsigned raddr_a = ((shader->instructions[c][d] & (0x3fll << 18)) >> 18);
|
||||
unsigned raddr_b = ((shader->instructions[c][d] & (0x3fll << 12)) >> 12);
|
||||
|
||||
if(raddr_a == 35)
|
||||
{
|
||||
@ -92,10 +83,6 @@ VkResult vkCreateShaderModuleFromRpiAssemblyEXT(VkDevice device, VkRpiShaderModu
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
FREE(instructions);
|
||||
|
||||
shader->sizes[c] = size;
|
||||
}
|
||||
else
|
||||
|
@ -949,9 +949,6 @@ void CreateShaders()
|
||||
/**/
|
||||
//display a color
|
||||
char fs_asm_code[] =
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
|
||||
/// instead of outputting the final color
|
||||
/// we patch the shader (eventually in the driver)
|
||||
/// so that it performs the desired blending mode
|
||||
|
@ -1087,9 +1087,6 @@ void CreateShaders()
|
||||
/**/
|
||||
//display a color
|
||||
char fs_asm_code[] =
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; tlb_color_all = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;"
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
|
@ -1087,22 +1087,6 @@ void CreateShaders()
|
||||
/**/
|
||||
//display a color
|
||||
char fs_asm_code[] =
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
///stencil setup state
|
||||
/// 28 24 20 16 12 8 4 0
|
||||
///1111 0100 1001 0111 1110 1110 1111 1111
|
||||
/// -> 0xF497EEFF
|
||||
///selection = front and back (0x3)
|
||||
///write mask = 0xff
|
||||
///z test fail op = replace (0x2)
|
||||
///z test pass op = replace (0x2)
|
||||
///stencil test fail op = replace (0x2)
|
||||
///stencil function = always (0x7)
|
||||
///stencil ref value = 0xee
|
||||
///stencil function mask = 0xff
|
||||
"sig_load_imm ; r0 = load32.always(0xF497EEFF) ; nop = load32() ;" //stencil setup state
|
||||
"sig_none ; tlb_stencil_setup = or.always(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
|
||||
///omit color write
|
||||
"sig_none ; r0 = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;"
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
@ -1114,22 +1098,6 @@ void CreateShaders()
|
||||
/**/
|
||||
//display a color
|
||||
char fs_asm_code2[] =
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
///stencil setup state
|
||||
/// 28 24 20 16 12 8 4 0
|
||||
///1111 0010 0100 1101 1110 1110 1111 1111
|
||||
/// -> 0xF24DEEFF
|
||||
///selection = front and back (0x3)
|
||||
///write mask = 0xff
|
||||
///z test fail op = keep (0x1)
|
||||
///z test pass op = keep (0x1)
|
||||
///stencil test fail op = keep (0x1)
|
||||
///stencil function = not equal (0x5)
|
||||
///stencil ref value = 0xee
|
||||
///stencil function mask = 0xff
|
||||
"sig_load_imm ; r0 = load32.always(0xF24DEEFF) ; nop = load32() ;" //stencil setup state
|
||||
"sig_none ; tlb_stencil_setup = or.always(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; tlb_z = or.always(b, b, nop, rb15) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; tlb_color_all = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;"
|
||||
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
|
||||
@ -1336,9 +1304,9 @@ void CreatePipeline()
|
||||
|
||||
|
||||
depthStencilState.front.compareOp = VK_COMPARE_OP_NOT_EQUAL;
|
||||
depthStencilState.front.depthFailOp = VK_STENCIL_OP_REPLACE;
|
||||
depthStencilState.front.failOp = VK_STENCIL_OP_REPLACE;
|
||||
depthStencilState.front.passOp = VK_STENCIL_OP_REPLACE;
|
||||
depthStencilState.front.depthFailOp = VK_STENCIL_OP_KEEP;
|
||||
depthStencilState.front.failOp = VK_STENCIL_OP_KEEP;
|
||||
depthStencilState.front.passOp = VK_STENCIL_OP_KEEP;
|
||||
depthStencilState.back = depthStencilState.front;
|
||||
|
||||
blendAttachState.colorWriteMask = 0xf;
|
||||
|
Loading…
x
Reference in New Issue
Block a user