1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-12 00:08:54 +01:00
rpi-vk-driver/driver/copy.c
2019-12-12 21:10:02 +00:00

723 lines
28 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "common.h"
#include "declarations.h"
#include "vkExtFunctions.h"
uint32_t getMemoryTypeIndex(VkPhysicalDeviceMemoryProperties deviceMemoryProperties, uint32_t typeBits, VkMemoryPropertyFlags properties)
{
// Iterate over all memory types available for the device used in this example
for (uint32_t i = 0; i < deviceMemoryProperties.memoryTypeCount; i++)
{
if ((typeBits & 1) == 1)
{
if ((deviceMemoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
{
return i;
}
}
typeBits >>= 1;
}
assert(0);
}
void createFullscreenQuad(VkDevice device, VkBuffer* fsqVertexBuffer, VkDeviceMemory* fsqVertexBufferMemory)
{
VkMemoryRequirements mr;
{ //create fsq vertex buffer
unsigned vboSize = sizeof(float) * 2 * 3 * 2; //2 * 3 x vec2
VkBufferCreateInfo ci = {};
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ci.size = vboSize;
ci.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VkResult res = rpi_vkCreateBuffer(device, &ci, 0, fsqVertexBuffer);
rpi_vkGetBufferMemoryRequirements(device, *fsqVertexBuffer, &mr);
VkPhysicalDeviceMemoryProperties pdmp;
rpi_vkGetPhysicalDeviceMemoryProperties(((_device*)device)->dev, &pdmp);
VkMemoryAllocateInfo mai = {};
mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
mai.allocationSize = mr.size;
mai.memoryTypeIndex = getMemoryTypeIndex(pdmp, mr.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
res = rpi_vkAllocateMemory(device, &mai, 0, fsqVertexBufferMemory);
float vertices[] =
{
-1, -1,
1, -1,
1, 1,
1, 1,
-1, 1,
-1, -1
};
void* data;
res = rpi_vkMapMemory(device, *fsqVertexBufferMemory, 0, mr.size, 0, &data);
memcpy(data, vertices, vboSize);
rpi_vkUnmapMemory(device, *fsqVertexBufferMemory);
res = rpi_vkBindBufferMemory(device, *fsqVertexBuffer, *fsqVertexBufferMemory, 0);
}
}
void createDescriptorSet(VkDevice device, VkDescriptorPool* descriptorPool, VkDescriptorSet* blitDescriptorSet, VkDescriptorSetLayout* blitDsl, VkBufferView texelBufferView)
{
//create blit dsl
VkDescriptorSetLayoutBinding setLayoutBinding = {};
setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
setLayoutBinding.binding = 0;
setLayoutBinding.descriptorCount = 1;
setLayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkDescriptorSetLayoutCreateInfo descriptorLayoutCI = {};
descriptorLayoutCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
descriptorLayoutCI.bindingCount = 1;
descriptorLayoutCI.pBindings = &setLayoutBinding;
rpi_vkCreateDescriptorSetLayout(device, &descriptorLayoutCI, 0, blitDsl);
VkDescriptorPoolSize descriptorPoolSizes[1];
descriptorPoolSizes[0].descriptorCount = 1;
descriptorPoolSizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
VkDescriptorPoolCreateInfo descriptorPoolCI = {};
descriptorPoolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptorPoolCI.poolSizeCount = 1;
descriptorPoolCI.pPoolSizes = descriptorPoolSizes;
descriptorPoolCI.maxSets = 1;
rpi_vkCreateDescriptorPool(device, &descriptorPoolCI, 0, descriptorPool);
//create blit descriptor set
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = *descriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = blitDsl;
rpi_vkAllocateDescriptorSets(device, &allocInfo, blitDescriptorSet);
VkWriteDescriptorSet writeDescriptorSet = {};
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet.dstSet = *blitDescriptorSet;
writeDescriptorSet.dstBinding = 0;
writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
writeDescriptorSet.pTexelBufferView = &texelBufferView;
writeDescriptorSet.descriptorCount = 1;
rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0);
}
void createRendertarget(VkDevice device, uint32_t width, uint32_t height, VkImage textureImage, VkImageView* textureView, VkSampler* textureSampler, VkRenderPass* offscreenRenderPass, VkFramebuffer* offscreenFramebuffer)
{
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM;
VkImageViewCreateInfo view = {};
view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = format;
view.components.a = VK_COMPONENT_SWIZZLE_A;
view.components.b = VK_COMPONENT_SWIZZLE_B;
view.components.g = VK_COMPONENT_SWIZZLE_G;
view.components.r = VK_COMPONENT_SWIZZLE_R;
view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view.subresourceRange.baseMipLevel = 0;
view.subresourceRange.baseArrayLayer = 0;
view.subresourceRange.layerCount = 1;
view.subresourceRange.levelCount = 1;
view.image = textureImage;
rpi_vkCreateImageView(device, &view, 0, textureView);
VkSamplerCreateInfo sampler = {};
sampler.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
sampler.magFilter = VK_FILTER_NEAREST;
sampler.minFilter = VK_FILTER_NEAREST;
sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
sampler.mipLodBias = 0.0f;
sampler.compareOp = VK_COMPARE_OP_NEVER;
sampler.minLod = 0.0f;
sampler.maxLod = 0.0f;
sampler.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
rpi_vkCreateSampler(device, &sampler, 0, textureSampler);
VkAttachmentDescription attachmentDescription = {};
attachmentDescription.format = format;
attachmentDescription.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachmentDescription.finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
attachmentDescription.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachmentDescription.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachmentDescription.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescription.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescription.samples = VK_SAMPLE_COUNT_1_BIT;
VkAttachmentReference colorReference = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL };
VkSubpassDescription subpassDescription = {};
subpassDescription.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpassDescription.colorAttachmentCount = 1;
subpassDescription.pColorAttachments = &colorReference;
VkSubpassDependency dependencies[2];
dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
dependencies[0].dstSubpass = 0;
dependencies[0].srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependencies[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[0].srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
dependencies[1].srcSubpass = 0;
dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
dependencies[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[1].dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
dependencies[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
VkRenderPassCreateInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderPassInfo.attachmentCount = 1;
renderPassInfo.pAttachments = &attachmentDescription;
renderPassInfo.subpassCount = 1;
renderPassInfo.pSubpasses = &subpassDescription;
renderPassInfo.dependencyCount = 2;
renderPassInfo.pDependencies = dependencies;
rpi_vkCreateRenderPass(device, &renderPassInfo, 0, offscreenRenderPass);
VkImageView attachments = *textureView;
VkFramebufferCreateInfo framebufferCreateInfo = {};
framebufferCreateInfo.renderPass = *offscreenRenderPass;
framebufferCreateInfo.attachmentCount = 1;
framebufferCreateInfo.pAttachments = &attachments;
framebufferCreateInfo.width = width;
framebufferCreateInfo.height = height;
framebufferCreateInfo.layers = 1;
rpi_vkCreateFramebuffer(device, &framebufferCreateInfo, 0, offscreenFramebuffer);
}
void createPipeline(VkDevice device, VkShaderModule blitShaderModule, VkDescriptorSetLayout blitDsl, VkPipelineLayout* blitPipelineLayout, VkRenderPass offscreenRenderPass, VkPipeline* blitPipeline)
{
VkVertexInputBindingDescription vertexInputBindingDescription =
{
0,
sizeof(float) * 2,
VK_VERTEX_INPUT_RATE_VERTEX
};
VkVertexInputAttributeDescription vertexInputAttributeDescription =
{
0,
0,
VK_FORMAT_R32G32_SFLOAT,
0
};
VkPipelineVertexInputStateCreateInfo vertexInputInfo = {};
vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vertexInputInfo.vertexAttributeDescriptionCount = 1;
vertexInputInfo.pVertexAttributeDescriptions = &vertexInputAttributeDescription;
vertexInputInfo.vertexBindingDescriptionCount = 1;
vertexInputInfo.pVertexBindingDescriptions = &vertexInputBindingDescription;
VkPipelineInputAssemblyStateCreateInfo pipelineIACreateInfo = {};
pipelineIACreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
pipelineIACreateInfo.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
VkPipelineRasterizationStateCreateInfo rastCreateInfo = {};
rastCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rastCreateInfo.polygonMode = VK_POLYGON_MODE_FILL;
rastCreateInfo.cullMode = VK_CULL_MODE_NONE;
rastCreateInfo.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
rastCreateInfo.lineWidth = 1.0f;
VkPipelineMultisampleStateCreateInfo pipelineMSCreateInfo = {};
pipelineMSCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
VkPipelineColorBlendAttachmentState blendAttachState = {};
blendAttachState.colorWriteMask = 0xf;
blendAttachState.blendEnable = false;
VkPipelineColorBlendStateCreateInfo blendCreateInfo = {};
blendCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
blendCreateInfo.attachmentCount = 1;
blendCreateInfo.pAttachments = &blendAttachState;
VkPipelineDepthStencilStateCreateInfo depthStencilState = {};
depthStencilState.depthTestEnable = false;
depthStencilState.stencilTestEnable = false;
//create blit pipeline
VkPushConstantRange pushConstantRanges[2];
pushConstantRanges[0].offset = 0;
pushConstantRanges[0].size = 4 * 4; //4 * 32bits
pushConstantRanges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushConstantRanges[1].offset = 0;
pushConstantRanges[1].size = 3 * 4; //3 * 32bits
pushConstantRanges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkPipelineShaderStageCreateInfo shaderStageCreateInfo[2] = {};
shaderStageCreateInfo[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shaderStageCreateInfo[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
shaderStageCreateInfo[0].module = blitShaderModule;
shaderStageCreateInfo[0].pName = "main";
shaderStageCreateInfo[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shaderStageCreateInfo[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
shaderStageCreateInfo[1].module = blitShaderModule;
shaderStageCreateInfo[1].pName = "main";
VkPipelineLayoutCreateInfo pipelineLayoutCI = {};
pipelineLayoutCI.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipelineLayoutCI.setLayoutCount = 1;
pipelineLayoutCI.pSetLayouts = &blitDsl;
pipelineLayoutCI.pushConstantRangeCount = 2;
pipelineLayoutCI.pPushConstantRanges = &pushConstantRanges[0];
rpi_vkCreatePipelineLayout(device, &pipelineLayoutCI, 0, blitPipelineLayout);
VkDynamicState dynState = VK_DYNAMIC_STATE_VIEWPORT;
VkPipelineDynamicStateCreateInfo pdsci = {};
pdsci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
pdsci.dynamicStateCount = 1;
pdsci.pDynamicStates = &dynState;
VkPipelineViewportStateCreateInfo pvsci = {};
pvsci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
pvsci.viewportCount = 0;
pvsci.scissorCount = 0;
VkGraphicsPipelineCreateInfo pipelineInfo = {};
pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
pipelineInfo.stageCount = 2;
pipelineInfo.pStages = &shaderStageCreateInfo[0];
pipelineInfo.pVertexInputState = &vertexInputInfo;
pipelineInfo.pInputAssemblyState = &pipelineIACreateInfo;
pipelineInfo.pViewportState = &pvsci;
pipelineInfo.pDynamicState = &pdsci;
pipelineInfo.pRasterizationState = &rastCreateInfo;
pipelineInfo.pMultisampleState = &pipelineMSCreateInfo;
pipelineInfo.pColorBlendState = &blendCreateInfo;
pipelineInfo.renderPass = offscreenRenderPass;
pipelineInfo.basePipelineIndex = -1;
pipelineInfo.pDepthStencilState = &depthStencilState;
pipelineInfo.layout = *blitPipelineLayout;
VkResult res = rpi_vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, blitPipeline);
}
void createShaderModule(VkDevice device, VkShaderModule* blitShaderModule)
{
char vs_asm_code[] =
///0x40000000 = 2.0
///uni = 1.0
///rb0 = 2 - 1 = 1
"sig_small_imm ; rx0 = fsub.ws.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0) ;\n"
///set up VPM read for subsequent reads
///0x00201a00: 0000 0000 0010 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///packed
///horizontal
///stride=1
///vectors to read = 2 (how many components)
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///uni = viewportXScale
///r0 = vpm * uni
"sig_none ; nop = nop(r0, r0, vpm_read, uni) ; r0 = fmul.always(a, b) ;\n"
///r1 = r0 * rb0 (1)
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r1 = fmul.always(r0, b) ;\n"
///uni = viewportYScale
///ra0.16a = int(r1), r2 = vpm * uni
"sig_none ; rx0.16a = ftoi.always(r1, r1, vpm_read, uni) ; r2 = fmul.always(a, b) ;\n"
///r3 = r2 * rb0
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r3 = fmul.always(r2, b) ;\n"
///ra0.16b = int(r3)
"sig_none ; rx0.16b = ftoi.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///set up VPM write for subsequent writes
///0x00001a00: 0000 0000 0000 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///horizontal
///stride = 1
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded vertex format for PSE
/// Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0);\n"
/// Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0);\n"
/// 1.0 / Wc
///vpm = rb0 (1)
"sig_none ; vpm = or.always(b, b, nop, rb0) ; nop = nop(r0, r0);\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
char cs_asm_code[] =
///uni = 1.0
///r3 = 2.0 - uni
"sig_small_imm ; r3 = fsub.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///r2 = vpm
"sig_none ; r2 = or.always(a, a, vpm_read, nop) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded coordinates format for PTB
/// write Xc
///r1 = vpm, vpm = r2
"sig_none ; r1 = or.always(a, a, vpm_read, nop) ; vpm = v8min.always(r2, r2);\n"
/// write Yc
///uni = viewportXscale
///vpm = r1, r2 = r2 * uni
"sig_none ; vpm = or.always(r1, r1, uni, nop) ; r2 = fmul.always(r2, a);\n"
///uni = viewportYscale
///r1 = r1 * uni
"sig_none ; nop = nop(r0, r0, uni, nop) ; r1 = fmul.always(r1, a);\n"
///r0 = r2 * r3
"sig_none ; nop = nop(r0, r0) ; r0 = fmul.always(r2, r3);\n"
///ra0.16a = r0, r1 = r1 * r3
"sig_none ; rx0.16a = ftoi.always(r0, r0) ; r1 = fmul.always(r1, r3) ;\n"
///ra0.16b = r1
"sig_none ; rx0.16b = ftoi.always(r1, r1) ; nop = nop(r0, r0) ;\n"
///write Zc
///vpm = 0
"sig_small_imm ; vpm = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;\n"
///write Wc
///vpm = 1.0
"sig_small_imm ; vpm = or.always(b, b, nop, 0x3f800000) ; nop = nop(r0, r0) ;\n"
///write Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0) ;\n"
///write Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;\n"
///write 1/Wc
///vpm = r3
"sig_none ; vpm = or.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
//clever: use small immedate -1 interpreted as 0xffffffff (white) to set color to white
//"sig_small_imm ; tlb_color_all = or.always(b, b, nop, -1) ; nop = nop(r0, r0) ;"
//8bit access
//abcd
//BGRA
/**
"General-memory lookups are performed by writing to just the s parameter, using the absolute memory
address. In this case no uniform is read. General-memory lookups always return a 32-bit value, and the bottom
two bits of the address are ignored."
/**/
//blit buffer to texture (generic buffer read)
char blit_fs_asm_code[] =
"sig_load_imm ; r2 = load32.always(0x44f00000) ; nop = load32() ;" //width = 1920.0
"sig_none ; r1 = itof.always(b, b, x_pix, y_pix) ; nop = nop(r0, r0) ;" //FragCoord Y
"sig_none ; r0 = itof.always(a, a, x_pix, y_pix) ; r1 = fmul.always(r1, r2) ;" //FragCoord X, r1 = Y * width
"sig_none ; r0 = fadd.always(r0, r1) ; r0 = nop(r0, r0) ;" //r0 = Y * width + X
"sig_small_imm ; r0 = nop(r0, r0, nop, 0x40800000) ; r0 = fmul.always(r0, b) ;" //r0 = (Y * width + X) * 4
"sig_none ; r0 = ftoi.always(r0, r0) ; nop = nop(r0, r0) ;" //convert to integer
///write general mem access address
///first argument must be clamped to [0...bufsize-4]
///eg must do min(max(x,0), uni)
///second argument must be a uniform (containing base address, which is 0)
///writing tmu0_s signals that all coordinates are written
"sig_small_imm ; r0 = max.always(r0, b, nop, 0) ; nop = nop(r0, r0) ;" //clamp general access
"sig_none ; r0 = min.always(r0, b, nop, uni) ; nop = nop(r0, r0) ;" //uni = 1920 * 1080 * 4 - 4
"sig_none ; tmu0_s = add.always(r0, b, nop, uni) ; nop = nop(r0, r0) ;"
///suspend thread (after 2 nops) to wait for TMU request to finish
"sig_thread_switch ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///read TMU0 request result to R4
"sig_load_tmu0 ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///when thread has been awakened, MOV from R4 to R0
"sig_none ; r0 = fmax.pm.always.8a(r4, r4) ; nop = nop(r0, r0) ;"
"sig_none ; r1 = fmax.pm.always.8b(r4, r4) ; r0.8a = v8min.always(r0, r0) ;"
"sig_none ; r2 = fmax.pm.always.8c(r4, r4) ; r0.8b = v8min.always(r1, r1) ;"
"sig_none ; r3 = fmax.pm.always.8d(r4, r4) ; r0.8c = v8min.always(r2, r2) ;"
"sig_none ; nop = nop.pm(r0, r0) ; r0.8d = v8min.always(r3, r3) ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"\0";
char* blit_asm_strings[] =
{
(char*)cs_asm_code, (char*)vs_asm_code, (char*)blit_fs_asm_code, 0
};
VkRpiAssemblyMappingEXT blit_mappings[] = {
//vertex shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
4, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
8, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
12, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
VK_SHADER_STAGE_FRAGMENT_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
4, //resource offset
VK_SHADER_STAGE_FRAGMENT_BIT
},
//fragment shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR,
VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
VK_SHADER_STAGE_FRAGMENT_BIT
}
};
VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {};
shaderModuleCreateInfo.asmStrings = blit_asm_strings;
shaderModuleCreateInfo.mappings = blit_mappings;
shaderModuleCreateInfo.numMappings = sizeof(blit_mappings) / sizeof(VkRpiAssemblyMappingEXT);
shaderModuleCreateInfo.pShaderModule = blitShaderModule;
((_device*)device)->dev->customData = (uintptr_t)&shaderModuleCreateInfo;
//PFN_vkCreateShaderModuleFromRpiAssemblyEXT vkCreateShaderModuleFromRpiAssemblyEXT = (PFN_vkCreateShaderModuleFromRpiAssemblyEXT)vkGetInstanceProcAddr(instance, "vkCreateShaderModuleFromRpiAssemblyEXT");
VkResult res = rpi_vkCreateShaderModuleFromRpiAssemblyEXT(((_device*)device)->dev);
assert(blitShaderModule);
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyBufferToImage(
VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
_commandBuffer* cmdBuf = commandBuffer;
_device* device = cmdBuf->dev;
_buffer* buf = srcBuffer;
_image* img = dstImage;
for(uint32_t c = 0; c < regionCount; ++c)
{
//TODO support this
assert(!pRegions[c].bufferRowLength);
assert(!pRegions[c].bufferImageHeight);
uint32_t width = pRegions[c].imageExtent.width, height = pRegions[c].imageExtent.height;
VkBufferView texelBufferView;
VkBufferViewCreateInfo bvci = {};
bvci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
bvci.buffer = buf;
bvci.format = img->format;
bvci.offset = pRegions[c].bufferOffset;
bvci.range = width * height * getFormatBpp(img->format) * 0.25f;
rpi_vkCreateBufferView(device, &bvci, 0, &texelBufferView);
VkBuffer fsqVertexBuffer;
VkDeviceMemory fsqVertexBufferMemory;
VkDescriptorPool descriptorPool;
VkDescriptorSet blitDescriptorSet;
VkDescriptorSetLayout blitDsl;
VkImage textureImage = img;
VkImageView textureView;
VkSampler textureSampler;
VkRenderPass offscreenRenderPass;
VkFramebuffer offscreenFramebuffer;
VkShaderModule blitShaderModule;
VkPipeline blitPipeline;
VkPipelineLayout blitPipelineLayout;
createFullscreenQuad(device, &fsqVertexBuffer, &fsqVertexBufferMemory);
createDescriptorSet(device, &descriptorPool, &blitDescriptorSet, &blitDsl, texelBufferView);
createRendertarget(device, width, height, textureImage, &textureView, &textureSampler, &offscreenRenderPass, &offscreenFramebuffer);
createShaderModule(device, &blitShaderModule);
createPipeline(device, blitShaderModule, blitDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
//offscreen rendering
VkClearValue offscreenClearValues =
{
.color = { 1.0f, 0.0f, 1.0f, 1.0f }
};
VkRenderPassBeginInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
renderPassInfo.renderArea.offset.x = 0;
renderPassInfo.renderArea.offset.y = 0;
renderPassInfo.renderArea.extent.width = width;
renderPassInfo.renderArea.extent.height = height;
renderPassInfo.framebuffer = offscreenFramebuffer;
renderPassInfo.renderPass = offscreenRenderPass;
renderPassInfo.clearValueCount = 1;
renderPassInfo.pClearValues = &offscreenClearValues;
rpi_vkCmdBeginRenderPass(commandBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
rpi_vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipeline);
VkViewport vp = {};
vp.x = 0.0f;
vp.y = 0.0f;
vp.width = (float)width;
vp.height = (float)height;
vp.minDepth = 0.0f;
vp.maxDepth = 1.0f;
rpi_vkCmdSetViewport(commandBuffer, 0, 1, &vp);
VkDeviceSize offsets = 0;
rpi_vkCmdBindVertexBuffers(commandBuffer, 0, 1, &fsqVertexBuffer, &offsets );
rpi_vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipelineLayout, 0, 1, &blitDescriptorSet, 0, 0);
float Wcoeff = 1.0f; //1.0f / Wc = 2.0 - Wcoeff
float viewportScaleX = (float)(width) * 0.5f * 16.0f;
float viewportScaleY = -1.0f * (float)(height) * 0.5f * 16.0f;
float Zs = 0.5f;
uint32_t vertConstants[4];
vertConstants[0] = *(uint32_t*)&Wcoeff;
vertConstants[1] = *(uint32_t*)&viewportScaleX;
vertConstants[2] = *(uint32_t*)&viewportScaleY;
vertConstants[3] = *(uint32_t*)&Zs;
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(vertConstants), &vertConstants);
uint32_t size = width * height * 4 - 4;//swapChainExtent.width * swapChainExtent.height * 4;
uint32_t fragConstants[2];
fragConstants[0] = size;
fragConstants[1] = 0;
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(fragConstants), &fragConstants);
rpi_vkCmdDraw(commandBuffer, 6, 1, 0, 0);
rpi_vkCmdEndRenderPass(commandBuffer);
}
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageBlit* pRegions,
VkFilter filter)
{
//TODO
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdResolveImage(
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageResolve* pRegions)
{
//TODO
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyImageToBuffer(
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer dstBuffer,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
//TODO
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyImage(
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageCopy* pRegions)
{
rpi_vkCmdBlitImage(commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions, VK_FILTER_NEAREST);
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyBuffer(
VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkBuffer dstBuffer,
uint32_t regionCount,
const VkBufferCopy* pRegions)
{
//TODO
}