1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2024-12-01 13:24:20 +01:00

made some progress with vkcmdblitimage

This commit is contained in:
Unknown 2020-03-02 21:50:08 +00:00
parent 1934acaf6f
commit 126a5e7ac7
3 changed files with 424 additions and 63 deletions

View File

@ -48,6 +48,8 @@ void* consecutivePoolAllocate(ConsecutivePoolAllocator* pa, uint32_t numBlocks)
{
assert(pa->buf);
// fprintf(stderr, "pa->nextFreeBlock %u\n", pa->nextFreeBlock);
if(!pa->nextFreeBlock)
{
return 0; //no free blocks

View File

@ -135,9 +135,14 @@ typedef struct VkDevice_T
VkBuffer emulFsqVertexBuffer;
VkDeviceMemory emulFsqVertexBufferMemory;
VkDescriptorPool emulDescriptorPool;
VkDescriptorSetLayout emulBlitDsl;
VkSampler emulTextureSampler;
VkShaderModule emulBlitShaderModule;
VkDescriptorSetLayout emulBufferDsl;
VkDescriptorSetLayout emulTextureDsl;
VkSampler emulNearestTextureSampler;
VkSampler emulLinearTextureSampler;
VkShaderModule emulBufferToTextureShaderModule;
VkShaderModule emulTextureToTextureShaderModule;
VkShaderModule emulTextureToBufferShaderModule; //TODO
VkShaderModule emulBufferToBufferShaderModule; //TODO
} _device;
typedef struct VkRenderPass_T

View File

@ -40,7 +40,7 @@ void createFullscreenQuad(VkDevice device, VkBuffer* fsqVertexBuffer, VkDeviceMe
VkMemoryRequirements mr;
{ //create fsq vertex buffer
unsigned vboSize = sizeof(float) * 2 * 3 * 2; //2 * 3 x vec2
unsigned vboSize = sizeof(float) * 4 * 3 * 2; //4 * 3 x vec2
VkBufferCreateInfo ci = {};
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
@ -63,13 +63,13 @@ void createFullscreenQuad(VkDevice device, VkBuffer* fsqVertexBuffer, VkDeviceMe
float vertices[] =
{
-1, -1,
1, -1,
1, 1,
-1, -1, 0, 0,
1, -1, 1, 0,
1, 1, 1, 1,
1, 1,
-1, 1,
-1, -1
1, 1, 1, 1,
-1, 1, 0, 1,
-1, -1, 0, 0
};
void* data;
@ -81,20 +81,29 @@ void createFullscreenQuad(VkDevice device, VkBuffer* fsqVertexBuffer, VkDeviceMe
}
}
void createDescriptorPool(VkDevice device, VkDescriptorPool* descriptorPool, VkDescriptorSetLayout* blitDsl)
void createDescriptorPool(VkDevice device, VkDescriptorPool* descriptorPool)
{
VkDescriptorPoolSize descriptorPoolSizes[1];
VkDescriptorPoolSize descriptorPoolSizes[2];
descriptorPoolSizes[0].descriptorCount = 100;
descriptorPoolSizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
descriptorPoolSizes[1].descriptorCount = 100;
descriptorPoolSizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
VkDescriptorPoolCreateInfo descriptorPoolCI = {};
descriptorPoolCI.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptorPoolCI.poolSizeCount = 1;
descriptorPoolCI.poolSizeCount = 2;
descriptorPoolCI.pPoolSizes = descriptorPoolSizes;
descriptorPoolCI.maxSets = 100;
descriptorPoolCI.maxSets = 200;
descriptorPoolCI.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
rpi_vkCreateDescriptorPool(device, &descriptorPoolCI, 0, descriptorPool);
}
void createDescriptorSetLayouts(VkDevice device, VkDescriptorSetLayout* bufferDsl, VkDescriptorSetLayout* textureDsl)
{
assert(device);
assert(bufferDsl);
assert(textureDsl);
//create blit dsl
VkDescriptorSetLayoutBinding setLayoutBinding = {};
@ -108,32 +117,13 @@ void createDescriptorPool(VkDevice device, VkDescriptorPool* descriptorPool, VkD
descriptorLayoutCI.bindingCount = 1;
descriptorLayoutCI.pBindings = &setLayoutBinding;
rpi_vkCreateDescriptorSetLayout(device, &descriptorLayoutCI, 0, blitDsl);
rpi_vkCreateDescriptorSetLayout(device, &descriptorLayoutCI, 0, bufferDsl);
setLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
rpi_vkCreateDescriptorSetLayout(device, &descriptorLayoutCI, 0, textureDsl);
}
void createDescriptorSet(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorSet* blitDescriptorSet, VkDescriptorSetLayout* blitDsl, VkBufferView texelBufferView)
{
//create blit descriptor set
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = descriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = blitDsl;
rpi_vkAllocateDescriptorSets(device, &allocInfo, blitDescriptorSet);
VkWriteDescriptorSet writeDescriptorSet = {};
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet.dstSet = *blitDescriptorSet;
writeDescriptorSet.dstBinding = 0;
writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
writeDescriptorSet.pTexelBufferView = &texelBufferView;
writeDescriptorSet.descriptorCount = 1;
rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0);
}
void createSampler(VkDevice device, VkSampler* textureSampler)
void createSampler(VkDevice device, VkSampler* nearestTextureSampler, VkSampler* linearTextureSampler)
{
VkSamplerCreateInfo sampler = {};
sampler.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
@ -146,12 +136,16 @@ void createSampler(VkDevice device, VkSampler* textureSampler)
sampler.mipLodBias = 0.0f;
sampler.compareOp = VK_COMPARE_OP_NEVER;
sampler.minLod = 0.0f;
sampler.maxLod = 0.0f;
sampler.maxLod = 999.0f;
sampler.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
rpi_vkCreateSampler(device, &sampler, 0, textureSampler);
rpi_vkCreateSampler(device, &sampler, 0, nearestTextureSampler);
sampler.magFilter = VK_FILTER_LINEAR;
sampler.minFilter = VK_FILTER_LINEAR;
rpi_vkCreateSampler(device, &sampler, 0, linearTextureSampler);
}
void createRendertarget(VkDevice device, uint32_t width, uint32_t height, VkImage textureImage, VkImageView* textureView, VkRenderPass* offscreenRenderPass, VkFramebuffer* offscreenFramebuffer)
void createRendertarget(VkDevice device, uint32_t baseMip, uint32_t width, uint32_t height, VkImage textureImage, VkImageView* textureView, VkRenderPass* offscreenRenderPass, VkFramebuffer* offscreenFramebuffer)
{
_image* img = textureImage;
VkFormat format = img->format;
@ -171,7 +165,7 @@ void createRendertarget(VkDevice device, uint32_t width, uint32_t height, VkImag
view.components.g = VK_COMPONENT_SWIZZLE_G;
view.components.r = VK_COMPONENT_SWIZZLE_R;
view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view.subresourceRange.baseMipLevel = 0;
view.subresourceRange.baseMipLevel = baseMip;
view.subresourceRange.baseArrayLayer = 0;
view.subresourceRange.layerCount = 1;
view.subresourceRange.levelCount = 1;
@ -236,27 +230,41 @@ void createRendertarget(VkDevice device, uint32_t width, uint32_t height, VkImag
rpi_vkCreateFramebuffer(device, &framebufferCreateInfo, 0, offscreenFramebuffer);
}
void createPipeline(VkDevice device, VkShaderModule blitShaderModule, VkDescriptorSetLayout blitDsl, VkPipelineLayout* blitPipelineLayout, VkRenderPass offscreenRenderPass, VkPipeline* blitPipeline)
void createPipeline(VkDevice device, uint32_t needTexcoords, uint32_t numVertUniforms, uint32_t numFragUniforms, VkShaderModule blitShaderModule, VkDescriptorSetLayout blitDsl, VkPipelineLayout* blitPipelineLayout, VkRenderPass offscreenRenderPass, VkPipeline* blitPipeline)
{
VkVertexInputBindingDescription vertexInputBindingDescription =
{
0,
sizeof(float) * 2,
sizeof(float) * 2 * 2,
VK_VERTEX_INPUT_RATE_VERTEX
};
VkVertexInputAttributeDescription vertexInputAttributeDescription =
VkVertexInputAttributeDescription vertexInputAttributeDescription[2];
if(!needTexcoords)
{
0,
0,
VK_FORMAT_R32G32_SFLOAT,
0
};
vertexInputAttributeDescription[0].binding = 0;
vertexInputAttributeDescription[0].location = 0;
vertexInputAttributeDescription[0].offset = 0;
vertexInputAttributeDescription[0].format = VK_FORMAT_R32G32_SFLOAT;
}
else
{
vertexInputAttributeDescription[0].binding = 0;
vertexInputAttributeDescription[0].location = 0;
vertexInputAttributeDescription[0].offset = 0;
vertexInputAttributeDescription[0].format = VK_FORMAT_R32G32_SFLOAT;
vertexInputAttributeDescription[1].binding = 0;
vertexInputAttributeDescription[1].location = 0;
vertexInputAttributeDescription[1].offset = sizeof(float) * 2;
vertexInputAttributeDescription[1].format = VK_FORMAT_R32G32_SFLOAT;
}
VkPipelineVertexInputStateCreateInfo vertexInputInfo = {};
vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vertexInputInfo.vertexAttributeDescriptionCount = 1;
vertexInputInfo.pVertexAttributeDescriptions = &vertexInputAttributeDescription;
vertexInputInfo.vertexAttributeDescriptionCount = needTexcoords ? 2 : 1;
vertexInputInfo.pVertexAttributeDescriptions = vertexInputAttributeDescription;
vertexInputInfo.vertexBindingDescriptionCount = 1;
vertexInputInfo.pVertexBindingDescriptions = &vertexInputBindingDescription;
@ -290,11 +298,11 @@ void createPipeline(VkDevice device, VkShaderModule blitShaderModule, VkDescript
//create blit pipeline
VkPushConstantRange pushConstantRanges[2];
pushConstantRanges[0].offset = 0;
pushConstantRanges[0].size = 4 * 4; //4 * 32bits
pushConstantRanges[0].size = numVertUniforms * 4; //n * 32bits
pushConstantRanges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushConstantRanges[1].offset = 0;
pushConstantRanges[1].size = 5 * 4; //5 * 32bits
pushConstantRanges[1].size = numFragUniforms * 4; //n * 32bits
pushConstantRanges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkPipelineShaderStageCreateInfo shaderStageCreateInfo[2] = {};
@ -347,7 +355,7 @@ void createPipeline(VkDevice device, VkShaderModule blitShaderModule, VkDescript
VkResult res = rpi_vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, blitPipeline);
}
void createShaderModule(VkDevice device, VkShaderModule* blitShaderModule)
void createBufferToTextureShaderModule(VkDevice device, VkShaderModule* blitShaderModule)
{
char vs_asm_code[] =
///0x40000000 = 2.0
@ -602,15 +610,222 @@ void createShaderModule(VkDevice device, VkShaderModule* blitShaderModule)
assert(blitShaderModule);
}
void createTextureToTextureShaderModule(VkDevice device, VkShaderModule* blitShaderModule)
{
char vs_asm_code[] =
///0x40000000 = 2.0
///uni = 1.0
///rb0 = 2 - 1 = 1
"sig_small_imm ; rx0 = fsub.ws.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0) ;\n"
///set up VPM read for subsequent reads
///0x00201a00: 0000 0000 0010 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///packed
///horizontal
///stride=1
///vectors to read = 4 (TODO not exactly clear what this means...)
"sig_load_imm ; vr_setup = load32.always(0x00401a00) ; nop = load32.always() ;\n"
///uni = viewportXScale
///r0 = vpm * uni
"sig_none ; nop = nop(r0, r0, vpm_read, uni) ; r0 = fmul.always(a, b) ;\n"
///r1 = r0 * rb0 (1)
"sig_none ; nop = nop(r0, r0, nop, rb0) ; r1 = fmul.always(r0, b) ;\n"
///uni = viewportYScale
///ra0.16a = int(r1), r2 = vpm * uni
"sig_none ; rx0.16a = ftoi.always(r1, r1, vpm_read, uni) ; r2 = fmul.always(a, b) ;\n"
///r3 = r2 * rb0
///r0 = vpm
"sig_none ; r0 = or.always(a, a, vpm_read, rb0) ; r3 = fmul.always(r2, b) ;\n"
///ra0.16b = int(r3)
///r1 = vpm
"sig_none ; rx0.16b = ftoi.always(r3, r3, vpm_read, nop) ; r1 = v8min.always(a, a) ;\n"
///set up VPM write for subsequent writes
///0x00001a00: 0000 0000 0000 0000 0001 1010 0000 0000
///addr: 0
///size: 32bit
///horizontal
///stride = 1
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded vertex format for PSE
/// Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0);\n"
/// Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0);\n"
/// 1.0 / Wc
///vpm = rb0 (1)
"sig_none ; vpm = or.always(b, b, nop, rb0) ; nop = nop(r0, r0);\n"
///vpm = r0
"sig_none ; vpm = or.always(r0, r0) ; nop = nop(r0, r0);\n"
///vpm = r1
"sig_none ; vpm = or.always(r1, r1) ; nop = nop(r0, r0);\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
char cs_asm_code[] =
///uni = 1.0
///r3 = 2.0 - uni
"sig_small_imm ; r3 = fsub.always(b, a, uni, 0x40000000) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vr_setup = load32.always(0x00201a00) ; nop = load32.always() ;\n"
///r2 = vpm
"sig_none ; r2 = or.always(a, a, vpm_read, nop) ; nop = nop(r0, r0);\n"
"sig_load_imm ; vw_setup = load32.always.ws(0x00001a00) ; nop = load32.always() ;\n"
///shaded coordinates format for PTB
/// write Xc
///r1 = vpm, vpm = r2
"sig_none ; r1 = or.always(a, a, vpm_read, nop) ; vpm = v8min.always(r2, r2);\n"
/// write Yc
///uni = viewportXscale
///vpm = r1, r2 = r2 * uni
"sig_none ; vpm = or.always(r1, r1, uni, nop) ; r2 = fmul.always(r2, a);\n"
///uni = viewportYscale
///r1 = r1 * uni
"sig_none ; nop = nop(r0, r0, uni, nop) ; r1 = fmul.always(r1, a);\n"
///r0 = r2 * r3
"sig_none ; nop = nop(r0, r0) ; r0 = fmul.always(r2, r3);\n"
///ra0.16a = r0, r1 = r1 * r3
"sig_none ; rx0.16a = ftoi.always(r0, r0) ; r1 = fmul.always(r1, r3) ;\n"
///ra0.16b = r1
"sig_none ; rx0.16b = ftoi.always(r1, r1) ; nop = nop(r0, r0) ;\n"
///write Zc
///vpm = 0
"sig_small_imm ; vpm = or.always(b, b, nop, 0) ; nop = nop(r0, r0) ;\n"
///write Wc
///vpm = 1.0
"sig_small_imm ; vpm = or.always(b, b, nop, 0x3f800000) ; nop = nop(r0, r0) ;\n"
///write Ys and Xs
///vpm = ra0
"sig_none ; vpm = or.always(a, a, ra0, nop) ; nop = nop(r0, r0) ;\n"
///write Zs
///uni = 0.5
///vpm = uni
"sig_none ; vpm = or.always(a, a, uni, nop) ; nop = nop(r0, r0) ;\n"
///write 1/Wc
///vpm = r3
"sig_none ; vpm = or.always(r3, r3) ; nop = nop(r0, r0) ;\n"
///END
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;\n"
"\0";
//sample texture
char sample_fs_asm_code[] =
///r0 = varyingX * W
"sig_none ; nop = nop(r0, r0, pay_zw, vary) ; r0 = fmul.always(a, b) ;"
///r2 = r0 + r5 (C)
///r0 = varyingY * W
"sig_none ; r2 = fadd.always(r0, r5, pay_zw, vary) ; r0 = fmul.always(a, b) ;"
///r3 = r0 + r5 (C)
"sig_none ; r3 = fadd.pm.always(r0, r5) ; nop = nop(r0, r0) ;"
///write texture addresses (x, y)
///writing tmu0_s signals that all coordinates are written
"sig_none ; tmu0_t = or.always(r3, r3) ; nop = nop(r0, r0) ;"
"sig_none ; tmu0_s = or.always(r2, r2) ; nop = nop(r0, r0) ;"
///suspend thread (after 2 nops) to wait for TMU request to finish
"sig_thread_switch ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///read TMU0 request result to R4
"sig_load_tmu0 ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
///when thread has been awakened, MOV from R4 to R0
"sig_none ; r0 = fmax.pm.always.8a(r4, r4) ; nop = nop(r0, r0) ;"
"sig_none ; r1 = fmax.pm.always.8b(r4, r4) ; r0.8a = v8min.always(r0, r0) ;"
"sig_none ; r2 = fmax.pm.always.8c(r4, r4) ; r0.8b = v8min.always(r1, r1) ;"
"sig_none ; r3 = fmax.pm.always.8d(r4, r4) ; r0.8c = v8min.always(r2, r2) ;"
"sig_none ; nop = nop.pm(r0, r0) ; r0.8d = v8min.always(r3, r3) ;"
"sig_none ; tlb_color_all = or.always(r0, r0) ; nop = nop(r0, r0) ;"
"sig_end ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_none ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"sig_unlock_score ; nop = nop(r0, r0) ; nop = nop(r0, r0) ;"
"\0";
char* blit_asm_strings[] =
{
(char*)cs_asm_code, (char*)vs_asm_code, (char*)sample_fs_asm_code, 0
};
VkRpiAssemblyMappingEXT blit_mappings[] = {
//vertex shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
4, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
8, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_PUSH_CONSTANT,
VK_DESCRIPTOR_TYPE_MAX_ENUM, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
12, //resource offset
VK_SHADER_STAGE_VERTEX_BIT
},
//fragment shader uniforms
{
VK_RPI_ASSEMBLY_MAPPING_TYPE_DESCRIPTOR,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, //descriptor type
0, //descriptor set #
0, //descriptor binding #
0, //descriptor array element #
0, //resource offset
VK_SHADER_STAGE_FRAGMENT_BIT
}
};
VkRpiShaderModuleAssemblyCreateInfoEXT shaderModuleCreateInfo = {};
shaderModuleCreateInfo.asmStrings = blit_asm_strings;
shaderModuleCreateInfo.mappings = blit_mappings;
shaderModuleCreateInfo.numMappings = sizeof(blit_mappings) / sizeof(VkRpiAssemblyMappingEXT);
shaderModuleCreateInfo.pShaderModule = blitShaderModule;
((_device*)device)->dev->customData = (uintptr_t)&shaderModuleCreateInfo;
VkResult res = rpi_vkCreateShaderModuleFromRpiAssemblyEXT(((_device*)device)->dev);
assert(blitShaderModule);
}
void setupEmulationResources(VkDevice device)
{
//create resources that won't change
_device* dev = device;
createFullscreenQuad(device, &dev->emulFsqVertexBuffer, &dev->emulFsqVertexBufferMemory);
createDescriptorPool(device, &dev->emulDescriptorPool, &dev->emulBlitDsl);
createSampler(device, &dev->emulTextureSampler);
createShaderModule(device, &dev->emulBlitShaderModule);
createDescriptorPool(device, &dev->emulDescriptorPool);
createDescriptorSetLayouts(device, &dev->emulBufferDsl, &dev->emulTextureDsl);
createSampler(device, &dev->emulNearestTextureSampler, &dev->emulLinearTextureSampler);
createBufferToTextureShaderModule(device, &dev->emulBufferToTextureShaderModule);
createTextureToTextureShaderModule(device, &dev->emulTextureToTextureShaderModule);
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyBufferToImage(
@ -645,7 +860,6 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyBufferToImage(
bvci.range = (width * height * pixelBpp) >> 3;
rpi_vkCreateBufferView(device, &bvci, 0, &texelBufferView);
VkDescriptorSet blitDescriptorSet;
VkImageView textureView;
VkRenderPass offscreenRenderPass;
@ -653,9 +867,25 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyBufferToImage(
VkPipeline blitPipeline;
VkPipelineLayout blitPipelineLayout;
createDescriptorSet(device, device->emulDescriptorPool, &blitDescriptorSet, &device->emulBlitDsl, texelBufferView);
createRendertarget(device, width, height, img, &textureView, &offscreenRenderPass, &offscreenFramebuffer);
createPipeline(device, device->emulBlitShaderModule, device->emulBlitDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
//create blit descriptor set
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = device->emulDescriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = &device->emulBufferDsl;
rpi_vkAllocateDescriptorSets(device, &allocInfo, &blitDescriptorSet);
VkWriteDescriptorSet writeDescriptorSet = {};
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet.dstSet = blitDescriptorSet;
writeDescriptorSet.dstBinding = 0;
writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
writeDescriptorSet.pTexelBufferView = &texelBufferView;
writeDescriptorSet.descriptorCount = 1;
rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0);
createRendertarget(device, pRegions[c].imageSubresource.mipLevel, width, height, img, &textureView, &offscreenRenderPass, &offscreenFramebuffer);
createPipeline(device, 0, 4, 5, device->emulBufferToTextureShaderModule, device->emulBufferDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
//offscreen rendering
VkClearValue offscreenClearValues =
@ -743,7 +973,131 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBlitImage(
const VkImageBlit* pRegions,
VkFilter filter)
{
//TODO
_commandBuffer* cmdBuf = commandBuffer;
_device* device = cmdBuf->dev;
_image* srcImg = srcImage;
_image* dstImg = dstImage;
for(uint32_t c = 0; c < regionCount; ++c)
{
uint32_t srcWidth = pRegions[c].srcOffsets[1].x - pRegions[c].srcOffsets[0].x;
uint32_t srcHeight = pRegions[c].srcOffsets[1].y - pRegions[c].srcOffsets[0].y;
uint32_t dstWidth = pRegions[c].dstOffsets[1].x - pRegions[c].dstOffsets[0].x;
uint32_t dstHeight = pRegions[c].dstOffsets[1].y - pRegions[c].dstOffsets[0].y;
uint32_t srcMipLevel = pRegions[c].srcSubresource.mipLevel;
uint32_t dstMipLevel = pRegions[c].dstSubresource.mipLevel;
uint32_t srcPixelBpp = getFormatBpp(srcImg->format);
uint32_t dstPixelBpp = getFormatBpp(dstImg->format);
VkDescriptorSet blitDescriptorSet;
VkImageView srcTextureView;
VkImageView dstTextureView;
VkRenderPass offscreenRenderPass;
VkFramebuffer offscreenFramebuffer;
VkPipeline blitPipeline;
VkPipelineLayout blitPipelineLayout;
VkImageViewCreateInfo view = {};
view.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view.viewType = VK_IMAGE_VIEW_TYPE_2D;
view.format = srcImg->format;
view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view.subresourceRange.baseMipLevel = srcMipLevel;
view.subresourceRange.baseArrayLayer = 0;
view.subresourceRange.layerCount = 1;
view.subresourceRange.levelCount = 1;
view.image = srcImage;
rpi_vkCreateImageView(device, &view, 0, &srcTextureView);
//TODO this crashes somehow
//create blit descriptor set
VkDescriptorSetAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
allocInfo.descriptorPool = device->emulDescriptorPool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = &device->emulTextureDsl;
rpi_vkAllocateDescriptorSets(device, &allocInfo, &blitDescriptorSet);
VkDescriptorImageInfo imageInfo;
imageInfo.imageView = srcTextureView;
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.sampler = filter == VK_FILTER_LINEAR ? device->emulLinearTextureSampler : device->emulNearestTextureSampler;
VkWriteDescriptorSet writeDescriptorSet = {};
writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writeDescriptorSet.dstSet = blitDescriptorSet;
writeDescriptorSet.dstBinding = 0;
writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writeDescriptorSet.pImageInfo = &imageInfo;
writeDescriptorSet.descriptorCount = 1;
rpi_vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, 0);
createRendertarget(device, dstMipLevel, dstWidth, dstHeight, dstImage, &dstTextureView, &offscreenRenderPass, &offscreenFramebuffer);
createPipeline(device, 1, 4, 1, device->emulTextureToTextureShaderModule, device->emulTextureDsl, &blitPipelineLayout, offscreenRenderPass, &blitPipeline);
//offscreen rendering
VkClearValue offscreenClearValues =
{
.color = { 1.0f, 0.0f, 1.0f, 1.0f }
};
VkRenderPassBeginInfo renderPassInfo = {};
renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
renderPassInfo.renderArea.offset.x = 0;
renderPassInfo.renderArea.offset.y = 0;
renderPassInfo.renderArea.extent.width = dstWidth;
renderPassInfo.renderArea.extent.height = dstHeight;
renderPassInfo.framebuffer = offscreenFramebuffer;
renderPassInfo.renderPass = offscreenRenderPass;
renderPassInfo.clearValueCount = 1;
renderPassInfo.pClearValues = &offscreenClearValues;
rpi_vkCmdBeginRenderPass(commandBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE);
rpi_vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipeline);
VkViewport vp = {};
vp.x = 0.0f;
vp.y = 0.0f;
vp.width = (float)dstWidth;
vp.height = (float)dstHeight;
vp.minDepth = 0.0f;
vp.maxDepth = 1.0f;
rpi_vkCmdSetViewport(commandBuffer, 0, 1, &vp);
VkDeviceSize offsets = 0;
rpi_vkCmdBindVertexBuffers(commandBuffer, 0, 1, &device->emulFsqVertexBuffer, &offsets );
rpi_vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, blitPipelineLayout, 0, 1, &blitDescriptorSet, 0, 0);
float Wcoeff = 1.0f; //1.0f / Wc = 2.0 - Wcoeff
float viewportScaleX = (float)(dstWidth) * 0.5f * 16.0f;
float viewportScaleY = -1.0f * (float)(dstHeight) * 0.5f * 16.0f;
float Zs = 0.5f;
uint32_t vertConstants[4];
vertConstants[0] = *(uint32_t*)&Wcoeff;
vertConstants[1] = *(uint32_t*)&viewportScaleX;
vertConstants[2] = *(uint32_t*)&viewportScaleY;
vertConstants[3] = *(uint32_t*)&Zs;
rpi_vkCmdPushConstants(commandBuffer, blitPipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(vertConstants), &vertConstants);
rpi_vkCmdDraw(commandBuffer, 6, 1, 0, 0);
rpi_vkCmdEndRenderPass(commandBuffer);
//free up resources
rpi_vkDestroyPipelineLayout(device, blitPipelineLayout, 0);
rpi_vkDestroyPipeline(device, blitPipeline, 0);
rpi_vkFreeDescriptorSets(device, device->emulDescriptorPool, 1, &blitDescriptorSet);
rpi_vkDestroyImageView(device, srcTextureView, 0);
rpi_vkDestroyImageView(device, dstTextureView, 0);
rpi_vkDestroyRenderPass(device, offscreenRenderPass, 0);
rpi_vkDestroyFramebuffer(device, offscreenFramebuffer, 0);
}
}
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdResolveImage(