1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-04 16:24:15 +01:00
rpi-vk-driver/driver/copy.c

1301 lines
49 KiB
C
Raw Normal View History

2019-04-22 15:58:27 +02:00
#include "common.h"
2019-12-12 22:10:02 +01:00
#include "declarations.h"
#include "QPUassembler/qpu_assembler.h"
2019-12-12 22:10:02 +01:00
//TODO change allocations to pool allocator
//TODO
// ETC1 is arranged as 64-bit blocks, where each block
// is 4x4 pixels. Texture tiling operates on the
// 64-bit block the way it would an uncompressed
// pixels.
// Cube map faces appear as whole miptrees at a page-aligned offset
// from the first face's miptree.
2019-12-12 22:10:02 +01:00
uint32_t getMemoryTypeIndex(VkPhysicalDeviceMemoryProperties deviceMemoryProperties, uint32_t typeBits, VkMemoryPropertyFlags properties)
{
// Iterate over all memory types available for the device used in this example
for (uint32_t i = 0; i < deviceMemoryProperties.memoryTypeCount; i++)
{
if ((typeBits & 1) == 1)
{
if ((deviceMemoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
{
return i;
}
}
typeBits >>= 1;
}
assert(0);
}
void createFullscreenQuad(VkDevice device, VkBuffer* fsqVertexBuffer, VkDeviceMemory* fsqVertexBufferMemory)
{
VkMemoryRequirements mr;
{ //create fsq vertex buffer
2020-03-02 22:50:08 +01:00
unsigned vboSize = sizeof(float) * 4 * 3 * 2; //4 * 3 x vec2
2019-12-12 22:10:02 +01:00
VkBufferCreateInfo ci = {};
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ci.size = vboSize;
ci.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VkResult res = rpi_vkCreateBuffer(device, &ci, 0, fsqVertexBuffer);
rpi_vkGetBufferMemoryRequirements(device, *fsqVertexBuffer, &mr);
VkPhysicalDeviceMemoryProperties pdmp;
rpi_vkGetPhysicalDeviceMemoryProperties(((_device*)device)->dev, &pdmp);
VkMemoryAllocateInfo mai = {};
mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
mai.allocationSize = mr.size;
mai.memoryTypeIndex = getMemoryTypeIndex(pdmp, mr.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
res = rpi_vkAllocateMemory(device, &mai, 0, fsqVertexBufferMemory);
float vertices[] =
{
-1, 1, 0, 1,
1, 1, 1, 1,
1, -1, 1, 0,
2019-12-12 22:10:02 +01:00
1, -1, 1, 0,
-1, -1, 0, 0,
-1, 1, 0, 1
2019-12-12 22:10:02 +01:00
};
void* data;
res = rpi_vkMapMemory(device, *fsqVertexBufferMemory, 0, mr.size, 0, &data);
memcpy(data, vertices, vboSize);
rpi_vkUnmapMemory(device, *fsqVertexBufferMemory);
res = rpi_vkBindBufferMemory(device, *fsqVertexBuffer, *fsqVertexBufferMemory, 0);
}
}
2020-03-02 22:50:08 +01:00
void createDescriptorPool(VkDevice device, VkDescriptorPool* descriptorPool)
2019-12-12 22:10:02 +01:00
{
2020-03-02 22:50:08 +01:00
VkDescriptorPoolSize descriptorPoolSizes[2];
descriptorPoolSizes[0].descriptorCount = 2048;
descriptorPoolSizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
descriptorPoolSizes[1].descriptorCount = 2048;
2020-03-02 22:50:08 +01:00
descriptorPoolSizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
VkDescriptorPoolCreateInfo descriptorPoolCI = {};
descriptorPoolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
2020-03-02 22:50:08 +01:00
descriptorPoolCI.poolSizeCount = 2;
descriptorPoolCI.pPoolSizes = descriptorPoolSizes;
descriptorPoolCI.maxSets = 2048;
descriptorPoolCI.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
rpi_vkCreateDescriptorPool(device, &descriptorPoolCI, 0, descriptorPool);
2020-03-02 22:50:08 +01:00
}
void createDescriptorSetLayouts(VkDevice device, VkDescriptorSetLayout* bufferDsl, VkDescriptorSetLayout* textureDsl)
{
assert(device);
assert(bufferDsl);
assert(textureDsl);
2019-12-12 22:10:02 +01:00
//create blit dsl
VkDescriptorSetLayoutBinding setLayoutBinding = {};
setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
setLayoutBinding.binding = 0;
setLayoutBinding.descriptorCount = 1;
setLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkDescriptorSetLayoutCreateInfo descriptorLayoutCI = {};
descriptorLayoutCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
descriptorLayoutCI.bindingCount = 1;
descriptorLayoutCI.pBindings = &setLayoutBinding;
2020-03-02 22:50:08 +01:00
rpi_vkCreateDescriptorSetLayout(device, &descriptorLayoutCI, 0, bufferDsl);
2019-12-12 22:10:02 +01:00
2020-03-02 22:50:08 +01:00
setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
rpi_vkCreateDescriptorSetLayout(device, &descriptorLayoutCI, 0, textureDsl);
2019-12-12 22:10:02 +01:00
}
2020-03-02 22:50:08 +01:00
void createSampler(VkDevice device, VkSampler* nearestTextureSampler, VkSampler* linearTextureSampler)
{
VkSamplerCreateInfo sampler = {};
sampler.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler.magFilter = VK_FILTER_NEAREST;
sampler.minFilter = VK_FILTER_NEAREST;
sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler.mipLodBias = 0.0f;
sampler.compareOp = VK_COMPARE_OP_NEVER;
sampler.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
2020-03-02 22:50:08 +01:00
rpi_vkCreateSampler(device, &sampler, 0, nearestTextureSampler);
sampler.magFilter = VK_FILTER_LINEAR;
sampler.minFilter = VK_FILTER_LINEAR;
rpi_vkCreateSampler(device, &sampler, 0, linearTextureSampler);
}
void createRendertarget(VkDevice device, uint32_t baseLayer, uint32_t baseMip, uint32_t width, uint32_t height, VkImage textureImage, VkImageView* textureView, VkRenderPass* offscreenRenderPass, VkFramebuffer* offscreenFramebuffer)
2019-12-12 22:10:02 +01:00
{
_image* img = textureImage;
VkFormat format = img->format;
// printf("\nCopy Create RT\n");
// printf("baseLayer %u\n", baseLayer);
// printf("baseMip %u\n", baseMip);
// printf("width %u\n", width);
// printf("height %u\n", height);
//we can't render to an ETC1 texture, so we'll just stick with RGBA8 for now
if(img->format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK)
{
format = VK_FORMAT_R8G8B8A8_UNORM;
}
2019-12-12 22:10:02 +01:00
VkImageViewCreateInfo view = {};
view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = format;
view.components.a = VK_COMPONENT_SWIZZLE_A;
view.components.b = VK_COMPONENT_SWIZZLE_B;
view.components.g = VK_COMPONENT_SWIZZLE_G;
view.components.r = VK_COMPONENT_SWIZZLE_R;
view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2020-03-02 22:50:08 +01:00
view.subresourceRange.baseMipLevel = baseMip;
view.subresourceRange.baseArrayLayer = baseLayer;
2019-12-12 22:10:02 +01:00
view.subresourceRange.layerCount = 1;
view.subresourceRange.levelCount = 1;
view.image = textureImage;
rpi_vkCreateImageView(device, &view, 0, textureView);
VkAttachmentDescription attachmentDescription = {};
attachmentDescription.format = format;
attachmentDescription.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachmentDescription.finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
attachmentDescription.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachmentDescription.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachmentDescription.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescription.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescription.samples = VK_SAMPLE_COUNT_1_BIT;
VkAttachmentReference colorReference = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL };
VkSubpassDescription subpassDescription = {};
subpassDescription.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpassDescription.colorAttachmentCount = 1;
subpassDescription.pColorAttachments = &colorReference;
VkSubpassDependency dependencies[2];
dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
dependencies[0].dstSubpass = 0;
dependencies[0].srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependencies[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[0].srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
dependencies[1].srcSubpass = 0;
dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
dependencies[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[1].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependencies[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
VkRenderPassCreateInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderPassInfo.attachmentCount = 1;
renderPassInfo.pAttachments = &attachmentDescription;
renderPassInfo.subpassCount = 1;
renderPassInfo.pSubpasses = &subpassDescription;
renderPassInfo.dependencyCount = 2;
renderPassInfo.pDependencies = dependencies;
rpi_vkCreateRenderPass(device, &renderPassInfo, 0, offscreenRenderPass);
VkImageView attachments = *textureView;
VkFramebufferCreateInfo framebufferCreateInfo = {};
framebufferCreateInfo.renderPass = *offscreenRenderPass;
framebufferCreateInfo.attachmentCount = 1;
framebufferCreateInfo.pAttachments = &attachments;
framebufferCreateInfo.width = width;
framebufferCreateInfo.height = height;
framebufferCreateInfo.layers = 1;
rpi_vkCreateFramebuffer(device, &framebufferCreateInfo, 0, offscreenFramebuffer);
}
2020-03-02 22:50:08 +01:00
void createPipeline(VkDevice device, uint32_t needTexcoords, uint32_t numVertUniforms, uint32_t numFragUniforms, VkShaderModule blitShaderModule, VkDescriptorSetLayout blitDsl, VkPipelineLayout* blitPipelineLayout, VkRenderPass offscreenRenderPass, VkPipeline* blitPipeline)
2019-12-12 22:10:02 +01:00
{
VkVertexInputBindingDescription vertexInputBindingDescription =
{
0,
2020-03-02 22:50:08 +01:00
sizeof(float) * 2 * 2,
2019-12-12 22:10:02 +01:00
VK_VERTEX_INPUT_RATE_VERTEX
};
2020-03-02 22:50:08 +01:00
VkVertexInputAttributeDescription vertexInputAttributeDescription[2];
if(!needTexcoords)
2019-12-12 22:10:02 +01:00
{
2020-03-02 22:50:08 +01:00
vertexInputAttributeDescription[0].binding = 0;
vertexInputAttributeDescription[0].location = 0;
vertexInputAttributeDescription[0].offset = 0;
vertexInputAttributeDescription[0].format = VK_FORMAT_R32G32_SFLOAT;
}
else
{
vertexInputAttributeDescription[0].binding = 0;
vertexInputAttributeDescription[0].location = 0;
vertexInputAttributeDescription[0].offset = 0;
vertexInputAttributeDescription[0].format = VK_FORMAT_R32G32_SFLOAT;
vertexInputAttributeDescription[1].binding = 0;
2020-04-10 19:23:50 +02:00
vertexInputAttributeDescription[1].location = 1;
2020-03-02 22:50:08 +01:00
vertexInputAttributeDescription[1].offset = sizeof(float) * 2;
vertexInputAttributeDescription[1].format = VK_FORMAT_R32G32_SFLOAT;
}
2019-12-12 22:10:02 +01:00
VkPipelineVertexInputStateCreateInfo vertexInputInfo = {};
vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
2020-03-02 22:50:08 +01:00
vertexInputInfo.vertexAttributeDescriptionCount = needTexcoords ? 2 : 1;
vertexInputInfo.pVertexAttributeDescriptions = vertexInputAttributeDescription;
2019-12-12 22:10:02 +01:00
vertexInputInfo.vertexBindingDescriptionCount = 1;
vertexInputInfo.pVertexBindingDescriptions = &vertexInputBindingDescription;
VkPipelineInputAssemblyStateCreateInfo pipelineIACreateInfo = {};
pipelineIACreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
pipelineIACreateInfo.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
VkPipelineRasterizationStateCreateInfo rastCreateInfo = {};
rastCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rastCreateInfo.polygonMode = VK_POLYGON_MODE_FILL;
rastCreateInfo.cullMode = VK_CULL_MODE_NONE;
rastCreateInfo.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
rastCreateInfo.lineWidth = 1.0f;
VkPipelineMultisampleStateCreateInfo pipelineMSCreateInfo = {};
pipelineMSCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
VkPipelineColorBlendAttachmentState blendAttachState = {};
blendAttachState.colorWriteMask = 0xf;
blendAttachState.blendEnable = false;
VkPipelineColorBlendStateCreateInfo blendCreateInfo = {};
blendCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
blendCreateInfo.attachmentCount = 1;
blendCreateInfo.pAttachments = &blendAttachState;
VkPipelineDepthStencilStateCreateInfo depthStencilState = {};
depthStencilState.depthTestEnable = false;
depthStencilState.stencilTestEnable = false;
//create blit pipeline
VkPushConstantRange pushConstantRanges[2];
pushConstantRanges[0].offset = 0;
2020-03-02 22:50:08 +01:00
pushConstantRanges[0].size = numVertUniforms * 4; //n * 32bits
2019-12-12 22:10:02 +01:00
pushConstantRanges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushConstantRanges[1].offset = 0;
2020-03-02 22:50:08 +01:00
pushConstantRanges[1].size = numFragUniforms * 4; //n * 32bits
2019-12-12 22:10:02 +01:00
pushConstantRanges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkPipelineShaderStageCreateInfo shaderStageCreateInfo[2] = {};
shaderStageCreateInfo[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shaderStageCreateInfo[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
shaderStageCreateInfo[0].module = blitShaderModule;
shaderStageCreateInfo[0].pName = "main";
shaderStageCreateInfo[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shaderStageCreateInfo[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
shaderStageCreateInfo[1].module = blitShaderModule;
shaderStageCreateInfo[1].pName = "main";
VkPipelineLayoutCreateInfo pipelineLayoutCI = {};
pipelineLayoutCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipelineLayoutCI.setLayoutCount = 1;
pipelineLayoutCI.pSetLayouts = &blitDsl;
pipelineLayoutCI.pushConstantRangeCount = 2;
pipelineLayoutCI.pPushConstantRanges = &pushConstantRanges[0];
rpi_vkCreatePipelineLayout(device, &pipelineLayoutCI, 0, blitPipelineLayout);
VkDynamicState dynState = VK_DYNAMIC_STATE_VIEWPORT;
VkPipelineDynamicStateCreateInfo pdsci = {};
pdsci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
pdsci.dynamicStateCount = 1;
pdsci.pDynamicStates = &dynState;
VkPipelineViewportStateCreateInfo pvsci = {};
pvsci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
pvsci.viewportCount = 0;
pvsci.scissorCount = 0;
VkGraphicsPipelineCreateInfo pipelineInfo = {};
pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
pipelineInfo.stageCount = 2;
pipelineInfo.pStages = &shaderStageCreateInfo[0];
pipelineInfo.pVertexInputState = &vertexInputInfo;
pipelineInfo.pInputAssemblyState = &pipelineIACreateInfo;
pipelineInfo.pViewportState = &pvsci;
pipelineInfo.pDynamicState = &pdsci;
pipelineInfo.pRasterizationState = &rastCreateInfo;
pipelineInfo.pMultisampleState = &pipelineMSCreateInfo;
pipelineInfo.pColorBlendState = &blendCreateInfo;
pipelineInfo.renderPass = offscreenRenderPass;
pipelineInfo.basePipelineIndex = -1;
pipelineInfo.pDepthStencilState = &depthStencilState;
pipelineInfo.layout = *blitPipelineLayout;
VkResult res = rpi_vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, blitPipeline);
}
2020-03-02 22:50:08 +01:00
void createBufferToTextureShaderModule(VkDevice device, VkShaderModule* blitShaderModule)
2019-12-12 22:10:02 +01:00
{
char vs_asm_code[] =
///0x40000000 = 2.0
///uni = 1.0
///rb0 = 2 - 1 = 1
"sig_small_imm ; rx0 = fsub.ws.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0) ;\n"
///set up VPM read for subsequent reads
///0x00201a00: 0000 0000 0010 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///packed
///horizontal
///stride=1
///vectors to read = 2 (how many components)
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///uni = viewportXScale
///r0 = vpm * uni
"sig_none ; nop = nop(r0, r0, vpm_read, uni) ; r0 = fmul.always(a, b) ;\n"
///r1 = r0 * rb0 (1)
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r1 = fmul.always(r0, b) ;\n"
///uni = viewportYScale
///ra0.16a = int(r1), r2 = vpm * uni
"sig_none ; rx0.16a = ftoi.always(r1, r1, vpm_read, uni) ; r2 = fmul.always(a, b) ;\n"
///r3 = r2 * rb0
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r3 = fmul.always(r2, b) ;\n"
///ra0.16b = int(r3)
"sig_none ; rx0.16b = ftoi.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///set up VPM write for subsequent writes
///0x00001a00: 0000 0000 0000 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///horizontal
///stride = 1
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded vertex format for PSE
/// Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0);\n"
/// Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0);\n"
/// 1.0 / Wc
///vpm = rb0 (1)
"sig_none ; vpm = or.always(b, b, nop, rb0) ; nop = nop(r0, r0);\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
char cs_asm_code[] =
///uni = 1.0
///r3 = 2.0 - uni
"sig_small_imm ; r3 = fsub.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///r2 = vpm
"sig_none ; r2 = or.always(a, a, vpm_read, nop) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded coordinates format for PTB
/// write Xc
///r1 = vpm, vpm = r2
"sig_none ; r1 = or.always(a, a, vpm_read, nop) ; vpm = v8min.always(r2, r2);\n"
/// write Yc
///uni = viewportXscale
///vpm = r1, r2 = r2 * uni
"sig_none ; vpm = or.always(r1, r1, uni, nop) ; r2 = fmul.always(r2, a);\n"
///uni = viewportYscale
///r1 = r1 * uni
"sig_none ; nop = nop(r0, r0, uni, nop) ; r1 = fmul.always(r1, a);\n"
///r0 = r2 * r3
"sig_none ; nop = nop(r0, r0) ; r0 = fmul.always(r2, r3);\n"
///ra0.16a = r0, r1 = r1 * r3
"sig_none ; rx0.16a = ftoi.always(r0, r0) ; r1 = fmul.always(r1, r3) ;\n"
///ra0.16b = r1
"sig_none ; rx0.16b = ftoi.always(r1, r1) ; nop = nop(r0, r0) ;\n"
///write Zc
///vpm = 0
"sig_small_imm ; vpm = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;\n"
///write Wc
///vpm = 1.0
"sig_small_imm ; vpm = or.always(b, b, nop, 0x3f800000) ; nop = nop(r0, r0) ;\n"
///write Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0) ;\n"
///write Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;\n"
///write 1/Wc
///vpm = r3
"sig_none ; vpm = or.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
//clever: use small immedate -1 interpreted as 0xffffffff (white) to set color to white
//"sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
//8bit access
//abcd
//BGRA
/**
"General-memory lookups are performed by writing to just the s parameter, using the absolute memory
address. In this case no uniform is read. General-memory lookups always return a 32-bit value, and the bottom
two bits of the address are ignored."
/**/
//blit buffer to texture (generic buffer read)
char blit_fs_asm_code[] =
///"sig_load_imm ; r2 = load32.always(0x44f00000) ; nop = load32() ;" //width = 1920.0
"sig_none ; r2 = or.always(b, b, nop, uni) ; nop = nop(r0, r0) ;" //width
2019-12-12 22:10:02 +01:00
"sig_none ; r1 = itof.always(b, b, x_pix, y_pix) ; nop = nop(r0, r0) ;" //FragCoord Y
"sig_none ; r0 = itof.always(a, a, x_pix, y_pix) ; r1 = fmul.always(r1, r2) ;" //FragCoord X, r1 = Y * width
"sig_none ; r0 = fadd.always(r0, r1) ; r0 = nop(r0, r0) ;" //r0 = Y * width + X
"sig_none ; r0 = nop(r0, r0, nop, uni) ; r0 = fmul.always(r0, b) ;" //r0 = (Y * width + X) * pixelBpp
"sig_small_imm ; nop = nop(r0, r0, nop, 0x3e000000) ; r0 = fmul.always(r0, b) ;" //r0 = ((Y * width + X) * pixelBpp) / 8
2019-12-12 22:10:02 +01:00
"sig_none ; r0 = ftoi.always(r0, r0) ; nop = nop(r0, r0) ;" //convert to integer
///write general mem access address
///first argument must be clamped to [0...bufsize-4]
///eg must do min(max(x,0), uni)
///second argument must be a uniform (containing base address, which is 0)
///writing tmu0_s signals that all coordinates are written
"sig_small_imm ; r0 = max.always(r0, b, nop, 0) ; nop = nop(r0, r0) ;" //clamp general access
"sig_none ; r0 = min.always(r0, b, nop, uni) ; nop = nop(r0, r0) ;" //uni = width * height * pixelBytes - pixelBytes
"sig_none ; tmu0_s = add.always(r0, b, nop, uni) ; nop = nop(r0, r0) ;" //uni = 0
2019-12-12 22:10:02 +01:00
///suspend thread (after 2 nops) to wait for TMU request to finish
"sig_thread_switch ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///read TMU0 request result to R4
"sig_load_tmu0 ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///when thread has been awakened, MOV from R4 to R0
"sig_none ; r0 = fmax.pm.always.8a(r4, r4) ; nop = nop(r0, r0) ;"
"sig_none ; r1 = fmax.pm.always.8b(r4, r4) ; r0.8a = v8min.always(r0, r0) ;"
"sig_none ; r2 = fmax.pm.always.8c(r4, r4) ; r0.8b = v8min.always(r1, r1) ;"
"sig_none ; r3 = fmax.pm.always.8d(r4, r4) ; r0.8c = v8min.always(r2, r2) ;"
"sig_none ; nop = nop.pm(r0, r0) ; r0.8d = v8min.always(r3, r3) ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"\0";
char* blit_asm_strings[] =
{
(char*)cs_asm_code, (char*)vs_asm_code, (char*)blit_fs_asm_code, 0
};
VkRpiAssemblyMappingEXT vertexMappings[] = {
2019-12-12 22:10:02 +01:00
//vertex shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
4, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
8, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
12, //resource offset
},
};
VkRpiAssemblyMappingEXT fragmentMappings[] = {
//fragment shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR,
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
},
2019-12-12 22:10:02 +01:00
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
4, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
2019-12-12 22:10:02 +01:00
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
8, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
12, //resource offset
2019-12-12 22:10:02 +01:00
}
};
uint32_t spirv[6];
uint64_t* asm_ptrs[4] = {};
uint32_t asm_sizes[4] = {};
VkRpiAssemblyMappingEXT* asm_mappings[4] = {};
uint32_t asm_mappings_sizes[4] = {};
2019-12-12 22:10:02 +01:00
VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {};
shaderModuleCreateInfo.instructions = asm_ptrs;
shaderModuleCreateInfo.numInstructions = asm_sizes;
shaderModuleCreateInfo.mappings = asm_mappings;
shaderModuleCreateInfo.numMappings = asm_mappings_sizes;
asm_mappings[VK_RPI_ASSEMBLY_TYPE_VERTEX] = vertexMappings;
asm_mappings_sizes[VK_RPI_ASSEMBLY_TYPE_VERTEX] = sizeof(vertexMappings) / sizeof(VkRpiAssemblyMappingEXT);
asm_mappings[VK_RPI_ASSEMBLY_TYPE_FRAGMENT] = fragmentMappings;
asm_mappings_sizes[VK_RPI_ASSEMBLY_TYPE_FRAGMENT] = sizeof(fragmentMappings) / sizeof(VkRpiAssemblyMappingEXT);
2019-12-12 22:10:02 +01:00
//TODO use allocator
2019-12-12 22:10:02 +01:00
{ //assemble cs code
asm_sizes[0] = get_num_instructions(cs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[0];
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
asm_ptrs[0] = (uint64_t*)malloc(size);
assemble_qpu_asm(cs_asm_code, asm_ptrs[0]);
}
{ //assemble vs code
asm_sizes[1] = get_num_instructions(vs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[1];
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
asm_ptrs[1] = (uint64_t*)malloc(size);
assemble_qpu_asm(vs_asm_code, asm_ptrs[1]);
}
2019-12-12 22:10:02 +01:00
{ //assemble fs code
asm_sizes[2] = get_num_instructions(blit_fs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[2];
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
asm_ptrs[2] = (uint64_t*)malloc(size);
assemble_qpu_asm(blit_fs_asm_code, asm_ptrs[2]);
}
spirv[0] = 0x07230203;
spirv[1] = 0x00010000;
spirv[2] = 0x14E45250;
spirv[3] = 1;
spirv[4] = (uint32_t)&shaderModuleCreateInfo;
//words start here
spirv[5] = 1 << 16;
VkShaderModuleCreateInfo smci = {};
smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
smci.codeSize = sizeof(uint32_t)*6;
smci.pCode = spirv;
rpi_vkCreateShaderModule(device, &smci, 0, blitShaderModule);
2019-12-12 22:10:02 +01:00
assert(blitShaderModule);
for(uint32_t c = 0; c < 4; ++c)
{
free(asm_ptrs[c]);
}
2019-12-12 22:10:02 +01:00
}
2020-03-02 22:50:08 +01:00
void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitShaderModule)
{
char vs_asm_code[] =
///0x40000000 = 2.0
///uni = 1.0
///rb0 = 2 - 1 = 1
"sig_small_imm ; rx0 = fsub.ws.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0) ;\n"
///set up VPM read for subsequent reads
///0x00201a00: 0000 0000 0010 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///packed
///horizontal
///stride=1
2020-04-26 21:21:18 +02:00
///vectors to read = 4
2020-03-02 22:50:08 +01:00
"sig_load_imm ; vr_setup = load32.always(0x00401a00) ; nop = load32.always() ;\n"
///uni = viewportXScale
///r0 = vpm * uni
"sig_none ; nop = nop(r0, r0, vpm_read, uni) ; r0 = fmul.always(a, b) ;\n"
///r1 = r0 * rb0 (1)
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r1 = fmul.always(r0, b) ;\n"
///uni = viewportYScale
///ra0.16a = int(r1), r2 = vpm * uni
"sig_none ; rx0.16a = ftoi.always(r1, r1, vpm_read, uni) ; r2 = fmul.always(a, b) ;\n"
///r3 = r2 * rb0
///r0 = vpm
"sig_none ; r0 = or.always(a, a, vpm_read, rb0) ; r3 = fmul.always(r2, b) ;\n"
///ra0.16b = int(r3)
///r1 = vpm
"sig_none ; rx0.16b = ftoi.always(r3, r3, vpm_read, nop) ; r1 = v8min.always(a, a) ;\n"
///set up VPM write for subsequent writes
///0x00001a00: 0000 0000 0000 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///horizontal
///stride = 1
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded vertex format for PSE
/// Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0);\n"
/// Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0);\n"
/// 1.0 / Wc
///vpm = rb0 (1)
"sig_none ; vpm = or.always(b, b, nop, rb0) ; nop = nop(r0, r0);\n"
///vpm = r0
"sig_none ; vpm = or.always(r0, r0) ; nop = nop(r0, r0);\n"
///vpm = r1
"sig_none ; vpm = or.always(r1, r1) ; nop = nop(r0, r0);\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
char cs_asm_code[] =
///uni = 1.0
///r3 = 2.0 - uni
"sig_small_imm ; r3 = fsub.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///r2 = vpm
"sig_none ; r2 = or.always(a, a, vpm_read, nop) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded coordinates format for PTB
/// write Xc
///r1 = vpm, vpm = r2
"sig_none ; r1 = or.always(a, a, vpm_read, nop) ; vpm = v8min.always(r2, r2);\n"
/// write Yc
///uni = viewportXscale
///vpm = r1, r2 = r2 * uni
"sig_none ; vpm = or.always(r1, r1, uni, nop) ; r2 = fmul.always(r2, a);\n"
///uni = viewportYscale
///r1 = r1 * uni
"sig_none ; nop = nop(r0, r0, uni, nop) ; r1 = fmul.always(r1, a);\n"
///r0 = r2 * r3
"sig_none ; nop = nop(r0, r0) ; r0 = fmul.always(r2, r3);\n"
///ra0.16a = r0, r1 = r1 * r3
"sig_none ; rx0.16a = ftoi.always(r0, r0) ; r1 = fmul.always(r1, r3) ;\n"
///ra0.16b = r1
"sig_none ; rx0.16b = ftoi.always(r1, r1) ; nop = nop(r0, r0) ;\n"
///write Zc
///vpm = 0
"sig_small_imm ; vpm = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;\n"
///write Wc
///vpm = 1.0
"sig_small_imm ; vpm = or.always(b, b, nop, 0x3f800000) ; nop = nop(r0, r0) ;\n"
///write Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0) ;\n"
///write Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;\n"
///write 1/Wc
///vpm = r3
"sig_none ; vpm = or.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
//sample texture
char sample_fs_asm_code[] =
///r0 = varyingX * W
"sig_none ; nop = nop(r0, r0, pay_zw, vary) ; r0 = fmul.always(a, b) ;"
///r2 = r0 + r5 (C)
///r0 = varyingY * W
"sig_none ; r2 = fadd.always(r0, r5, pay_zw, vary) ; r0 = fmul.always(a, b) ;"
///r3 = r0 + r5 (C)
"sig_none ; r3 = fadd.pm.always(r0, r5, nop, uni) ; r0 = v8min.always(b, b) ;"
2020-03-02 22:50:08 +01:00
///write texture addresses (x, y)
///writing tmu0_s signals that all coordinates are written
"sig_none ; tmu0_b = or.always(r0, r0) ; nop = nop(r0, r0) ;"
2020-03-02 22:50:08 +01:00
"sig_none ; tmu0_t = or.always(r3, r3) ; nop = nop(r0, r0) ;"
"sig_none ; tmu0_s = or.always(r2, r2) ; nop = nop(r0, r0) ;"
///suspend thread (after 2 nops) to wait for TMU request to finish
"sig_thread_switch ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///read TMU0 request result to R4
"sig_load_tmu0 ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///when thread has been awakened, MOV from R4 to R0
"sig_none ; r0 = fmax.pm.always.8a(r4, r4) ; nop = nop(r0, r0) ;"
"sig_none ; r1 = fmax.pm.always.8b(r4, r4) ; r0.8a = v8min.always(r0, r0) ;"
"sig_none ; r2 = fmax.pm.always.8c(r4, r4) ; r0.8b = v8min.always(r1, r1) ;"
"sig_none ; r3 = fmax.pm.always.8d(r4, r4) ; r0.8c = v8min.always(r2, r2) ;"
"sig_none ; nop = nop.pm(r0, r0) ; r0.8d = v8min.always(r3, r3) ;"
///"sig_small_imm; r0 = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
2020-03-02 22:50:08 +01:00
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"\0";
char* blit_asm_strings[] =
{
(char*)cs_asm_code, (char*)vs_asm_code, (char*)sample_fs_asm_code, 0
};
VkRpiAssemblyMappingEXT vertexMappings[] = {
2020-03-02 22:50:08 +01:00
//vertex shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
4, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
8, //resource offset
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
12, //resource offset
},
};
2020-03-02 22:50:08 +01:00
VkRpiAssemblyMappingEXT fragmentMappings[] = {
2020-03-02 22:50:08 +01:00
//fragment shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
},
2020-03-02 22:50:08 +01:00
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
},
2020-03-02 22:50:08 +01:00
};
uint32_t spirv[6];
uint64_t* asm_ptrs[4] = {};
uint32_t asm_sizes[4] = {};
VkRpiAssemblyMappingEXT* asm_mappings[4] = {};
uint32_t asm_mappings_sizes[4] = {};
2020-03-02 22:50:08 +01:00
VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {};
shaderModuleCreateInfo.instructions = asm_ptrs;
shaderModuleCreateInfo.numInstructions = asm_sizes;
shaderModuleCreateInfo.mappings = asm_mappings;
shaderModuleCreateInfo.numMappings = asm_mappings_sizes;
asm_mappings[VK_RPI_ASSEMBLY_TYPE_VERTEX] = vertexMappings;
asm_mappings_sizes[VK_RPI_ASSEMBLY_TYPE_VERTEX] = sizeof(vertexMappings) / sizeof(VkRpiAssemblyMappingEXT);
asm_mappings[VK_RPI_ASSEMBLY_TYPE_FRAGMENT] = fragmentMappings;
asm_mappings_sizes[VK_RPI_ASSEMBLY_TYPE_FRAGMENT] = sizeof(fragmentMappings) / sizeof(VkRpiAssemblyMappingEXT);
2020-03-02 22:50:08 +01:00
//TODO use allocator
{ //assemble cs code
asm_sizes[0] = get_num_instructions(cs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[0];
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
asm_ptrs[0] = (uint64_t*)malloc(size);
assemble_qpu_asm(cs_asm_code, asm_ptrs[0]);
}
{ //assemble vs code
asm_sizes[1] = get_num_instructions(vs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[1];
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
asm_ptrs[1] = (uint64_t*)malloc(size);
assemble_qpu_asm(vs_asm_code, asm_ptrs[1]);
}
2020-03-02 22:50:08 +01:00
{ //assemble fs code
asm_sizes[2] = get_num_instructions(sample_fs_asm_code);
uint32_t size = sizeof(uint64_t)*asm_sizes[2];
//TODO this alloc feels kinda useless, we just copy the data anyway to kernel space
//why not map kernel space mem to user space instead?
asm_ptrs[2] = (uint64_t*)malloc(size);
assemble_qpu_asm(sample_fs_asm_code, asm_ptrs[2]);
}
spirv[0] = 0x07230203;
spirv[1] = 0x00010000;
spirv[2] = 0x14E45250;
spirv[3] = 1;
spirv[4] = (uint32_t)&shaderModuleCreateInfo;
//words start here
spirv[5] = 1 << 16;
VkShaderModuleCreateInfo smci = {};
smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
smci.codeSize = sizeof(uint32_t)*6;
smci.pCode = spirv;
rpi_vkCreateShaderModule(device, &smci, 0, blitShaderModule);
2020-03-02 22:50:08 +01:00
assert(blitShaderModule);
for(uint32_t c = 0; c < 4; ++c)
{
free(asm_ptrs[c]);
}
2020-03-02 22:50:08 +01:00
}
void setupEmulationResources(VkDevice device)
{
//create resources that won't change
_device* dev = device;
createFullscreenQuad(device, &dev->emulFsqVertexBuffer, &dev->emulFsqVertexBufferMemory);
2020-03-02 22:50:08 +01:00
createDescriptorPool(device, &dev->emulDescriptorPool);
createDescriptorSetLayouts(device, &dev->emulBufferDsl, &dev->emulTextureDsl);
createSampler(device, &dev->emulNearestTextureSampler, &dev->emulLinearTextureSampler);
createBufferToTextureShaderModule(device, &dev->emulBufferToTextureShaderModule);
createTextureToTextureShaderModule(device, &dev->emulTextureToTextureShaderModule);
}
2019-09-30 00:52:21 +02:00
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyBufferToImage(
2019-04-22 15:58:27 +02:00
VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
PROFILESTART(rpi_vkCmdCopyBufferToImage);
2019-12-12 22:10:02 +01:00
_commandBuffer* cmdBuf = commandBuffer;
_device* device = cmdBuf->dev;
_buffer* buf = srcBuffer;
_image* img = dstImage;
for(uint32_t c = 0; c < regionCount; ++c)
{
//TODO support this
assert(!pRegions[c].bufferRowLength);
assert(!pRegions[c].bufferImageHeight);
uint32_t width = pRegions[c].imageExtent.width, height = pRegions[c].imageExtent.height;
uint32_t pixelBpp = getFormatBpp(img->format);
2019-12-12 22:10:02 +01:00
VkBufferView texelBufferView;
VkBufferViewCreateInfo bvci = {};
bvci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
bvci.buffer = buf;
bvci.format = img->format;
bvci.offset = pRegions[c].bufferOffset;
bvci.range = (width * height * pixelBpp) >> 3;
2019-12-12 22:10:02 +01:00
rpi_vkCreateBufferView(device, &bvci, 0, &texelBufferView);
VkDescriptorSet blitDescriptorSet;
VkImageView textureView;
VkRenderPass offscreenRenderPass;
VkFramebuffer offscreenFramebuffer;
VkPipeline blitPipeline;
VkPipelineLayout blitPipelineLayout;
2020-03-02 22:50:08 +01:00
//create blit descriptor set
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = device->emulDescriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = &device->emulBufferDsl;
rpi_vkAllocateDescriptorSets(device, &allocInfo, &blitDescriptorSet);
VkWriteDescriptorSet writeDescriptorSet = {};
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet.dstSet = blitDescriptorSet;
writeDescriptorSet.dstBinding = 0;
writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
writeDescriptorSet.pTexelBufferView = &texelBufferView;
writeDescriptorSet.descriptorCount = 1;
rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0);
createRendertarget(device, pRegions[c].imageSubresource.baseArrayLayer, pRegions[c].imageSubresource.mipLevel, width, height, img, &textureView, &offscreenRenderPass, &offscreenFramebuffer);
2020-03-02 22:50:08 +01:00
createPipeline(device, 0, 4, 5, device->emulBufferToTextureShaderModule, device->emulBufferDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
2019-12-12 22:10:02 +01:00
//offscreen rendering
VkClearValue offscreenClearValues =
{
.color = { 1.0f, 0.0f, 1.0f, 1.0f }
};
VkRenderPassBeginInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
renderPassInfo.renderArea.offset.x = 0;
renderPassInfo.renderArea.offset.y = 0;
renderPassInfo.renderArea.extent.width = width;
renderPassInfo.renderArea.extent.height = height;
renderPassInfo.framebuffer = offscreenFramebuffer;
renderPassInfo.renderPass = offscreenRenderPass;
renderPassInfo.clearValueCount = 1;
renderPassInfo.pClearValues = &offscreenClearValues;
rpi_vkCmdBeginRenderPass(commandBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
rpi_vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipeline);
VkViewport vp = {};
vp.x = 0.0f;
vp.y = 0.0f;
vp.width = (float)width;
vp.height = (float)height;
vp.minDepth = 0.0f;
vp.maxDepth = 1.0f;
rpi_vkCmdSetViewport(commandBuffer, 0, 1, &vp);
VkDeviceSize offsets = 0;
rpi_vkCmdBindVertexBuffers(commandBuffer, 0, 1, &device->emulFsqVertexBuffer, &offsets );
2019-12-12 22:10:02 +01:00
rpi_vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipelineLayout, 0, 1, &blitDescriptorSet, 0, 0);
float Wcoeff = 1.0f; //1.0f / Wc = 2.0 - Wcoeff
float viewportScaleX = (float)(width) * 0.5f * 16.0f;
float viewportScaleY = 1.0f * (float)(height) * 0.5f * 16.0f;
float Zs = 1.0f;
2019-12-12 22:10:02 +01:00
uint32_t vertConstants[4];
vertConstants[0] = *(uint32_t*)&Wcoeff;
vertConstants[1] = *(uint32_t*)&viewportScaleX;
vertConstants[2] = *(uint32_t*)&viewportScaleY;
vertConstants[3] = *(uint32_t*)&Zs;
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(vertConstants), &vertConstants);
float w = width;
float bppfloat = pixelBpp;
uint32_t size = ((width * height * pixelBpp) >> 3) - ((pixelBpp > 32 ? pixelBpp : 32) >> 3);
uint32_t fragConstants[4];
fragConstants[0] = *(uint32_t*)&w;
fragConstants[1] = *(uint32_t*)&bppfloat;
fragConstants[2] = size;
fragConstants[3] = pRegions[c].bufferOffset + buf->boundOffset;
2019-12-12 22:10:02 +01:00
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(fragConstants), &fragConstants);
rpi_vkCmdDraw(commandBuffer, 6, 1, 0, 0);
rpi_vkCmdEndRenderPass(commandBuffer);
//free up resources
rpi_vkDestroyPipelineLayout(device, blitPipelineLayout, 0);
rpi_vkDestroyPipeline(device, blitPipeline, 0);
rpi_vkFreeDescriptorSets(device, device->emulDescriptorPool, 1, &blitDescriptorSet);
rpi_vkDestroyImageView(device, textureView, 0);
rpi_vkDestroyRenderPass(device, offscreenRenderPass, 0);
rpi_vkDestroyFramebuffer(device, offscreenFramebuffer, 0);
2019-12-12 22:10:02 +01:00
}
2019-12-14 20:48:30 +01:00
img->layout = dstImageLayout;
PROFILEEND(rpi_vkCmdCopyBufferToImage);
2019-04-22 15:58:27 +02:00
}
2019-09-30 00:52:21 +02:00
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
2019-04-22 15:58:27 +02:00
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageBlit* pRegions,
VkFilter filter)
{
PROFILESTART(rpi_vkCmdBlitImage);
2020-03-02 22:50:08 +01:00
_commandBuffer* cmdBuf = commandBuffer;
_device* device = cmdBuf->dev;
_image* srcImg = srcImage;
_image* dstImg = dstImage;
//TODO handle offsets
2020-03-02 22:50:08 +01:00
for(uint32_t c = 0; c < regionCount; ++c)
{
2020-05-15 22:20:01 +02:00
uint32_t srcWidth = max(pRegions[c].srcOffsets[1].x - pRegions[c].srcOffsets[0].x, 1);
uint32_t srcHeight = max(pRegions[c].srcOffsets[1].y - pRegions[c].srcOffsets[0].y, 1);
uint32_t dstWidth = max(pRegions[c].dstOffsets[1].x - pRegions[c].dstOffsets[0].x, 1);
uint32_t dstHeight = max(pRegions[c].dstOffsets[1].y - pRegions[c].dstOffsets[0].y, 1);
2020-03-02 22:50:08 +01:00
uint32_t srcMipLevel = pRegions[c].srcSubresource.mipLevel;
uint32_t dstMipLevel = pRegions[c].dstSubresource.mipLevel;
uint32_t srcPixelBpp = getFormatBpp(srcImg->format);
uint32_t dstPixelBpp = getFormatBpp(dstImg->format);
VkDescriptorSet blitDescriptorSet;
VkImageView srcTextureView;
VkImageView dstTextureView;
VkRenderPass offscreenRenderPass;
VkFramebuffer offscreenFramebuffer;
VkPipeline blitPipeline;
VkPipelineLayout blitPipelineLayout;
VkSampler mipSampler;
VkSamplerCreateInfo samplerCI = {};
samplerCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
samplerCI.magFilter = filter == VK_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
samplerCI.minFilter = filter == VK_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
2020-04-10 16:41:57 +02:00
samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.mipLodBias = 1.0f; //disable auto lod
samplerCI.compareOp = VK_COMPARE_OP_NEVER;
samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
rpi_vkCreateSampler(device, &samplerCI, 0, &mipSampler);
_sampler* s = mipSampler;
2020-03-02 22:50:08 +01:00
VkImageViewCreateInfo view = {};
view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = srcImg->format;
view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view.subresourceRange.baseMipLevel = srcMipLevel;
view.subresourceRange.baseArrayLayer = 0;
view.subresourceRange.layerCount = 1;
2020-04-10 16:04:09 +02:00
view.subresourceRange.levelCount = srcImg->miplevels;
2020-03-02 22:50:08 +01:00
view.image = srcImage;
rpi_vkCreateImageView(device, &view, 0, &srcTextureView);
//create blit descriptor set
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = device->emulDescriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = &device->emulTextureDsl;
rpi_vkAllocateDescriptorSets(device, &allocInfo, &blitDescriptorSet);
VkDescriptorImageInfo imageInfo;
imageInfo.imageView = srcTextureView;
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.sampler = mipSampler;
2020-03-02 22:50:08 +01:00
VkWriteDescriptorSet writeDescriptorSet = {};
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet.dstSet = blitDescriptorSet;
writeDescriptorSet.dstBinding = 0;
writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writeDescriptorSet.pImageInfo = &imageInfo;
writeDescriptorSet.descriptorCount = 1;
rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0);
createRendertarget(device, pRegions[c].dstSubresource.baseArrayLayer, dstMipLevel, dstWidth, dstHeight, dstImage, &dstTextureView, &offscreenRenderPass, &offscreenFramebuffer);
createPipeline(device, 1, 4, 2, device->emulTextureToTextureShaderModule, device->emulTextureDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
2020-03-02 22:50:08 +01:00
//offscreen rendering
VkClearValue offscreenClearValues =
{
.color = { 1.0f, 0.0f, 1.0f, 1.0f }
};
VkRenderPassBeginInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
renderPassInfo.renderArea.offset.x = 0;
renderPassInfo.renderArea.offset.y = 0;
renderPassInfo.renderArea.extent.width = dstWidth;
renderPassInfo.renderArea.extent.height = dstHeight;
renderPassInfo.framebuffer = offscreenFramebuffer;
renderPassInfo.renderPass = offscreenRenderPass;
renderPassInfo.clearValueCount = 1;
renderPassInfo.pClearValues = &offscreenClearValues;
rpi_vkCmdBeginRenderPass(commandBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
rpi_vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipeline);
VkViewport vp = {};
vp.x = 0.0f;
vp.y = 0.0f;
vp.width = (float)dstWidth;
vp.height = (float)dstHeight;
vp.minDepth = 0.0f;
vp.maxDepth = 1.0f;
rpi_vkCmdSetViewport(commandBuffer, 0, 1, &vp);
VkDeviceSize offsets = 0;
rpi_vkCmdBindVertexBuffers(commandBuffer, 0, 1, &device->emulFsqVertexBuffer, &offsets );
rpi_vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipelineLayout, 0, 1, &blitDescriptorSet, 0, 0);
float Wcoeff = 1.0f; //1.0f / Wc = 2.0 - Wcoeff
float viewportScaleX = (float)(dstWidth) * 0.5f * 16.0f;
float viewportScaleY = 1.0f * (float)(dstHeight) * 0.5f * 16.0f;
float Zs = 1.0f;
2020-03-02 22:50:08 +01:00
uint32_t vertConstants[4];
vertConstants[0] = *(uint32_t*)&Wcoeff;
vertConstants[1] = *(uint32_t*)&viewportScaleX;
vertConstants[2] = *(uint32_t*)&viewportScaleY;
vertConstants[3] = *(uint32_t*)&Zs;
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(vertConstants), &vertConstants);
2020-04-10 16:04:09 +02:00
float mipBias = srcMipLevel;
uint32_t fragConstants[1];
2020-04-10 16:04:09 +02:00
fragConstants[0] = *(uint32_t*)&mipBias;
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(fragConstants), &fragConstants);
2020-03-02 22:50:08 +01:00
rpi_vkCmdDraw(commandBuffer, 6, 1, 0, 0);
rpi_vkCmdEndRenderPass(commandBuffer);
//free up resources
2020-04-23 19:30:40 +02:00
rpi_vkDestroySampler(device, mipSampler, 0);
2020-03-02 22:50:08 +01:00
rpi_vkDestroyPipelineLayout(device, blitPipelineLayout, 0);
rpi_vkDestroyPipeline(device, blitPipeline, 0);
rpi_vkFreeDescriptorSets(device, device->emulDescriptorPool, 1, &blitDescriptorSet);
rpi_vkDestroyImageView(device, srcTextureView, 0);
rpi_vkDestroyImageView(device, dstTextureView, 0);
rpi_vkDestroyRenderPass(device, offscreenRenderPass, 0);
rpi_vkDestroyFramebuffer(device, offscreenFramebuffer, 0);
}
PROFILEEND(rpi_vkCmdBlitImage);
2019-04-22 15:58:27 +02:00
}
2019-09-30 00:52:21 +02:00
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdResolveImage(
2019-04-22 15:58:27 +02:00
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageResolve* pRegions)
{
PROFILESTART(rpi_vkCmdResolveImage);
2019-09-08 00:30:52 +02:00
//TODO
PROFILEEND(rpi_vkCmdResolveImage);
2019-04-22 15:58:27 +02:00
}
2019-09-30 00:52:21 +02:00
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyImageToBuffer(
2019-04-22 15:58:27 +02:00
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer dstBuffer,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
PROFILESTART(rpi_vkCmdCopyImageToBuffer);
2019-09-08 00:30:52 +02:00
//TODO
PROFILEEND(rpi_vkCmdCopyImageToBuffer);
2019-04-22 15:58:27 +02:00
}
2019-09-30 00:52:21 +02:00
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyImage(
2019-04-22 15:58:27 +02:00
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageCopy* pRegions)
{
PROFILESTART(rpi_vkCmdCopyImage);
2019-12-12 22:10:02 +01:00
rpi_vkCmdBlitImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions, VK_FILTER_NEAREST);
PROFILEEND(rpi_vkCmdCopyImage);
2019-04-22 15:58:27 +02:00
}
2019-09-30 00:52:21 +02:00
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyBuffer(
2019-04-22 15:58:27 +02:00
VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkBuffer dstBuffer,
uint32_t regionCount,
const VkBufferCopy* pRegions)
{
PROFILESTART(rpi_vkCmdCopyBuffer);
2019-09-08 00:30:52 +02:00
//TODO
PROFILEEND(rpi_vkCmdCopyImage);
2019-04-22 15:58:27 +02:00
}