From 9323469824cce40278587f88646dbddd4fe55884 Mon Sep 17 00:00:00 2001 From: Unknown <0.tamas.marton@gmail.com> Date: Mon, 4 Jun 2018 14:11:55 +0100 Subject: [PATCH] first test almost working, control list needs fixing --- driver/ConsecutivePoolAllocator.h | 7 +- driver/ControlListUtil.h | 11 +- driver/driver.c | 159 +++++++++-- external/include/kernel/vc4_packet.h | 408 +++++++++++++++++++++++++++ 4 files changed, 556 insertions(+), 29 deletions(-) create mode 100644 external/include/kernel/vc4_packet.h diff --git a/driver/ConsecutivePoolAllocator.h b/driver/ConsecutivePoolAllocator.h index adb3461..dd0007c 100644 --- a/driver/ConsecutivePoolAllocator.h +++ b/driver/ConsecutivePoolAllocator.h @@ -33,10 +33,11 @@ ConsecutivePoolAllocator createConsecutivePoolAllocator(char* b, unsigned bs, un //initialize linked list of free pointers uint32_t* ptr = pa.nextFreeBlock; - for(unsigned c = 0; c < s/bs - 1; ++c) + unsigned last = s/bs - 1; + for(unsigned c = 0; c < last; ++c) { - *ptr = ptr + bs; - ptr += bs; + *ptr = ptr + bs/4; + ptr += bs/4; } *ptr = 0; //last element diff --git a/driver/ControlListUtil.h b/driver/ControlListUtil.h index 2f77fb3..b6dae7d 100644 --- a/driver/ControlListUtil.h +++ b/driver/ControlListUtil.h @@ -30,6 +30,11 @@ static inline void clEmitShaderRelocation(ControlList* cl, const ControlListAddr #include +uint32_t divRoundUp(uint32_t n, uint32_t d) +{ + return (((n) + (d) - 1) / (d)); +} + //move bits to offset, mask rest to 0 uint32_t moveBits(uint32_t d, uint32_t bits, uint32_t offset) { @@ -535,7 +540,11 @@ void clInsertTileBinningModeConfiguration(ControlList* cl, *(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4; *(uint32_t*)cl->nextFreeByte = tileAllocationMemorySize; cl->nextFreeByte += 4; *(uint32_t*)cl->nextFreeByte = tileStateDataArrayAddress; cl->nextFreeByte += 4; - *(uint32_t*)cl->nextFreeByte = widthInPixels; cl->nextFreeByte += 4; + uint32_t tileSize = multisampleMode4x ? 32 : 64; + uint32_t widthInTiles = divRoundUp(widthInPixels, tileSize); + uint32_t heightInTiles = divRoundUp(heightInPixels, tileSize); + *(uint8_t*)cl->nextFreeByte = widthInTiles; cl->nextFreeByte++; + *(uint8_t*)cl->nextFreeByte = heightInTiles; cl->nextFreeByte++; *cl->nextFreeByte = moveBits(multisampleMode4x, 1, 0) | moveBits(tileBuffer64BitColorDepth, 1, 1) | diff --git a/driver/driver.c b/driver/driver.c index 906b345..d2d551a 100644 --- a/driver/driver.c +++ b/driver/driver.c @@ -19,6 +19,8 @@ #include "ConsecutivePoolAllocator.h" #include "LinearAllocator.h" +#include "kernel/vc4_packet.h" + #ifndef min #define min(a, b) (a < b ? a : b) #endif @@ -44,6 +46,7 @@ typedef struct VkCommandPool_T { PoolAllocator pa; ConsecutivePoolAllocator cpa; + uint32_t queueFamilyIndex; } _commandPool; typedef enum commandBufferState @@ -61,6 +64,8 @@ typedef struct VkCommandBuffer_T //Recorded commands include commands to bind pipelines and descriptor sets to the command buffer, commands to modify dynamic state, commands to draw (for graphics rendering), //commands to dispatch (for compute), commands to execute secondary command buffers (for primary command buffers only), commands to copy buffers and images, and other commands + struct drm_vc4_submit_cl submitCl; + ControlList binCl; ControlList shaderRecCl; uint32_t shaderRecCount; @@ -103,6 +108,16 @@ typedef struct VkSwapchain_T VkSurfaceKHR surface; } _swapchain; +void clFit(VkCommandBuffer cb, ControlList* cl, uint32_t commandSize) +{ + if(!clHasEnoughSpace(cl, commandSize)) + { + uint32_t currSize = clSize(cl); + cl->buffer = consecutivePoolReAllocate(&cb->cp->cpa, cl->buffer, cl->numBlocks); assert(cl->buffer); + cl->nextFreeByte = cl->buffer + currSize; + } +} + /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkEnumerateInstanceExtensionProperties * When pLayerName parameter is NULL, only extensions provided by the Vulkan implementation or by implicitly enabled layers are returned. When pLayerName is the name of a layer, @@ -567,7 +582,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore( { return VK_ERROR_OUT_OF_HOST_MEMORY; } - sem_init(s, 0, 0); + sem_init(s, 0, 0); //create semaphore unsignalled, shared between threads *pSemaphore = (VkSemaphore)s; @@ -753,10 +768,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( s->images[c].usageBits = pCreateInfo->imageUsage; int res = modeset_create_fb(controlFd, &s->images[c]); assert(res == 0); - - res = modeset_fb_for_dev(controlFd, s->surface, &s->images[c]); assert(res == 0); } + int res = modeset_fb_for_dev(controlFd, s->surface, &s->images[s->backbufferIdx]); assert(res == 0); + return VK_SUCCESS; } @@ -843,6 +858,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( return VK_ERROR_OUT_OF_HOST_MEMORY; } + cp->queueFamilyIndex = pCreateInfo->queueFamilyIndex; + //initial number of command buffers to hold int numCommandBufs = 100; int controlListSize = ARM_PAGE_SIZE * 100; @@ -975,9 +992,21 @@ VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( //When a command buffer begins recording, all state in that command buffer is undefined + struct drm_vc4_submit_cl submitCl = + { + .color_read.hindex = ~0, + .zs_read.hindex = ~0, + .color_write.hindex = ~0, + .msaa_color_write.hindex = ~0, + .zs_write.hindex = ~0, + .msaa_zs_write.hindex = ~0, + }; + commandBuffer->usageFlags = pBeginInfo->flags; commandBuffer->shaderRecCount = 0; commandBuffer->state = CMDBUF_STATE_RECORDING; + commandBuffer->submitCl = submitCl; + return VK_SUCCESS; } @@ -1022,6 +1051,23 @@ VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( //TODO } +uint32_t packVec4IntoRGBA8(const float rgba[4]) +{ + uint8_t r, g, b, a; + r = rgba[0] * 255.0; + g = rgba[1] * 255.0; + b = rgba[2] * 255.0; + a = rgba[3] * 255.0; + + uint32_t res = 0 | + (a << 0) | + (b << 8) | + (g << 16) | + (r << 24); + + return res; +} + /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCmdClearColorImage * Color and depth/stencil images can be cleared outside a render pass instance using vkCmdClearColorImage or vkCmdClearDepthStencilImage, respectively. @@ -1036,8 +1082,74 @@ VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage( const VkImageSubresourceRange* pRanges) { assert(commandBuffer); + assert(image); + assert(pColor); - //TODO implement VkImage to be able to clear backbuffer + //TODO in the end this should be a draw call, as this can only be used outside a render pass + //TODO ranges support + + assert(imageLayout == VK_IMAGE_LAYOUT_GENERAL || + imageLayout == VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR || + imageLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + assert(commandBuffer->state == CMDBUF_STATE_RECORDING); + assert(_queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_GRAPHICS_BIT || _queueFamilyProperties[commandBuffer->cp->queueFamilyIndex].queueFlags & VK_QUEUE_COMPUTE_BIT); + + //TODO externally sync cmdbuf, cmdpool + + _image* i = image; + + assert(i->usageBits & VK_IMAGE_USAGE_TRANSFER_DST_BIT); + + clFit(commandBuffer, &commandBuffer->binCl, V3D21_TILE_BINNING_MODE_CONFIGURATION_length); + clInsertTileBinningModeConfiguration(&commandBuffer->binCl, + 0, 0, 0, 0, + 0, //TODO 64bit color + i->samples > 1, //msaa + i->width, i->height, 0, 0, 0); + + //START_TILE_BINNING resets the statechange counters in the hardware, + //which are what is used when a primitive is binned to a tile to + //figure out what new state packets need to be written to that tile's + //command list. + clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length); + clInsertStartTileBinning(&commandBuffer->binCl); + + //Reset the current compressed primitives format. This gets modified + //by VC4_PACKET_GL_INDEXED_PRIMITIVE and + //VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start + //of every tile. + clFit(commandBuffer, &commandBuffer->binCl, V3D21_PRIMITIVE_LIST_FORMAT_length); + clInsertPrimitiveListFormat(&commandBuffer->binCl, + 1, //16 bit + 2); //tris + + clFit(commandBuffer, &commandBuffer->handlesCl, 4); + uint32_t idx = clGetHandleIndex(&commandBuffer->handlesCl, i->handle); + commandBuffer->submitCl.color_write.hindex = idx; + commandBuffer->submitCl.color_write.offset = 0; + commandBuffer->submitCl.color_write.flags = 0; + //TODO format, tiling + commandBuffer->submitCl.color_write.bits = + VC4_SET_FIELD(VC4_RENDER_CONFIG_FORMAT_RGBA8888, VC4_RENDER_CONFIG_FORMAT) | + VC4_SET_FIELD(VC4_TILING_FORMAT_LINEAR, VC4_RENDER_CONFIG_MEMORY_FORMAT); + + //TODO msaa? + + commandBuffer->submitCl.clear_color[0] = + commandBuffer->submitCl.clear_color[1] = packVec4IntoRGBA8(pColor->float32); + //TODO ranges + commandBuffer->submitCl.min_x_tile = 0; + commandBuffer->submitCl.min_y_tile = 0; + commandBuffer->submitCl.max_x_tile = (i->width - 1) / (i->samples > 1 ? 32 : 64); + commandBuffer->submitCl.max_y_tile = (i->height - 1) / (i->samples > 1 ? 32 : 64); + commandBuffer->submitCl.width = i->width; + commandBuffer->submitCl.height = i->height; + commandBuffer->submitCl.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR; + commandBuffer->submitCl.clear_z = 0; //TODO + commandBuffer->submitCl.clear_s = 0; + + //TODO I suppose this should be a submit itself? } /* @@ -1056,8 +1168,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer( //until the FLUSH completes. //The FLUSH caps all of our bin lists with a //VC4_PACKET_RETURN. - clHasEnoughSpace(&commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length); + clFit(commandBuffer, &commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length); clInsertIncrementSemaphore(&commandBuffer->binCl); + clFit(commandBuffer, &commandBuffer->binCl, V3D21_FLUSH_length); clInsertFlush(&commandBuffer->binCl); commandBuffer->state = CMDBUF_STATE_EXECUTABLE; @@ -1081,8 +1194,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( assert(semaphore != VK_NULL_HANDLE || fence != VK_NULL_HANDLE); - //TODO is this necessary? - sem_wait((sem_t*)semaphore); + sem_t* s = semaphore; //TODO we need to keep track of currently acquired images? @@ -1091,8 +1203,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( *pImageIndex = ((_swapchain*)swapchain)->backbufferIdx; //return back buffer index //signal semaphore - int semVal; sem_getvalue((sem_t*)semaphore, &semVal); assert(semVal <= 0); //make sure semaphore is unsignalled - sem_post((sem_t*)semaphore); + int semVal; sem_getvalue(s, &semVal); assert(semVal <= 0); //make sure semaphore is unsignalled + sem_post(s); //TODO signal fence @@ -1142,7 +1254,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( for(int c = 0; c < pSubmits->commandBufferCount; ++c) { - struct drm_vc4_submit_cl submitCl = + /*struct drm_vc4_submit_cl submitCl = { .color_read.hindex = ~0, .zs_read.hindex = ~0, @@ -1150,26 +1262,24 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( .msaa_color_write.hindex = ~0, .zs_write.hindex = ~0, .msaa_zs_write.hindex = ~0, - }; + };*/ - //TODO set rcl flags + VkCommandBuffer cmdbuf = pSubmits->pCommandBuffers[c]; - submitCl.bo_handles = pSubmits->pCommandBuffers[c]->handlesCl.buffer; - submitCl.bo_handle_count = clSize(&pSubmits->pCommandBuffers[c]->handlesCl) / 4; - submitCl.bin_cl = pSubmits->pCommandBuffers[c]->binCl.buffer; - submitCl.bin_cl_size = clSize(&pSubmits->pCommandBuffers[c]->binCl); - submitCl.shader_rec = pSubmits->pCommandBuffers[c]->shaderRecCl.buffer; - submitCl.shader_rec_size = clSize(&pSubmits->pCommandBuffers[c]->shaderRecCl); - submitCl.shader_rec_count = pSubmits->pCommandBuffers[c]->shaderRecCount; - submitCl.uniforms = pSubmits->pCommandBuffers[c]->uniformsCl.buffer; - submitCl.uniforms_size = clSize(&pSubmits->pCommandBuffers[c]->uniformsCl); - - //TODO set draw flags + cmdbuf->submitCl.bo_handles = cmdbuf->handlesCl.buffer; + cmdbuf->submitCl.bo_handle_count = clSize(&cmdbuf->handlesCl) / 4; + cmdbuf->submitCl.bin_cl = cmdbuf->binCl.buffer; + cmdbuf->submitCl.bin_cl_size = clSize(&cmdbuf->binCl); + cmdbuf->submitCl.shader_rec = cmdbuf->shaderRecCl.buffer; + cmdbuf->submitCl.shader_rec_size = clSize(&cmdbuf->shaderRecCl); + cmdbuf->submitCl.shader_rec_count = cmdbuf->shaderRecCount; + cmdbuf->submitCl.uniforms = cmdbuf->uniformsCl.buffer; + cmdbuf->submitCl.uniforms_size = clSize(&cmdbuf->uniformsCl); //submit ioctl uint64_t lastEmitSequno; //TODO uint64_t lastFinishedSequno; - vc4_cl_submit(renderFd, &submitCl, &lastEmitSequno, &lastFinishedSequno); + vc4_cl_submit(renderFd, &cmdbuf->submitCl, &lastEmitSequno, &lastFinishedSequno); } for(int c = 0; c < pSubmits->commandBufferCount; ++c) @@ -1319,7 +1429,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore( //TODO: allocator is ignored for now assert(pAllocator == 0); - sem_wait((sem_t*)semaphore); //must be externally synced sem_destroy((sem_t*)semaphore); } diff --git a/external/include/kernel/vc4_packet.h b/external/include/kernel/vc4_packet.h new file mode 100644 index 0000000..c2e3a51 --- /dev/null +++ b/external/include/kernel/vc4_packet.h @@ -0,0 +1,408 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC4_PACKET_H +#define VC4_PACKET_H + +enum vc4_packet { + VC4_PACKET_HALT = 0, + VC4_PACKET_NOP = 1, + + VC4_PACKET_FLUSH = 4, + VC4_PACKET_FLUSH_ALL = 5, + VC4_PACKET_START_TILE_BINNING = 6, + VC4_PACKET_INCREMENT_SEMAPHORE = 7, + VC4_PACKET_WAIT_ON_SEMAPHORE = 8, + + VC4_PACKET_BRANCH = 16, + VC4_PACKET_BRANCH_TO_SUB_LIST = 17, + + VC4_PACKET_STORE_MS_TILE_BUFFER = 24, + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, + VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, + VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, + VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, + VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, + + VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, + VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, + + VC4_PACKET_COMPRESSED_PRIMITIVE = 48, + VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, + + VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, + + VC4_PACKET_GL_SHADER_STATE = 64, + VC4_PACKET_NV_SHADER_STATE = 65, + VC4_PACKET_VG_SHADER_STATE = 66, + + VC4_PACKET_CONFIGURATION_BITS = 96, + VC4_PACKET_FLAT_SHADE_FLAGS = 97, + VC4_PACKET_POINT_SIZE = 98, + VC4_PACKET_LINE_WIDTH = 99, + VC4_PACKET_RHT_X_BOUNDARY = 100, + VC4_PACKET_DEPTH_OFFSET = 101, + VC4_PACKET_CLIP_WINDOW = 102, + VC4_PACKET_VIEWPORT_OFFSET = 103, + VC4_PACKET_Z_CLIPPING = 104, + VC4_PACKET_CLIPPER_XY_SCALING = 105, + VC4_PACKET_CLIPPER_Z_SCALING = 106, + + VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, + VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, + VC4_PACKET_CLEAR_COLORS = 114, + VC4_PACKET_TILE_COORDINATES = 115, + + /* Not an actual hardware packet -- this is what we use to put + * references to GEM bos in the command stream, since we need the u32 + * int the actual address packet in order to store the offset from the + * start of the BO. + */ + VC4_PACKET_GEM_HANDLES = 254, +} __attribute__ ((__packed__)); + +#define VC4_PACKET_HALT_SIZE 1 +#define VC4_PACKET_NOP_SIZE 1 +#define VC4_PACKET_FLUSH_SIZE 1 +#define VC4_PACKET_FLUSH_ALL_SIZE 1 +#define VC4_PACKET_START_TILE_BINNING_SIZE 1 +#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1 +#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1 +#define VC4_PACKET_BRANCH_SIZE 5 +#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5 +#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1 +#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1 +#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5 +#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5 +#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7 +#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7 +#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14 +#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10 +#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1 +#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1 +#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2 +#define VC4_PACKET_GL_SHADER_STATE_SIZE 5 +#define VC4_PACKET_NV_SHADER_STATE_SIZE 5 +#define VC4_PACKET_VG_SHADER_STATE_SIZE 5 +#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4 +#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5 +#define VC4_PACKET_POINT_SIZE_SIZE 5 +#define VC4_PACKET_LINE_WIDTH_SIZE 5 +#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3 +#define VC4_PACKET_DEPTH_OFFSET_SIZE 5 +#define VC4_PACKET_CLIP_WINDOW_SIZE 9 +#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5 +#define VC4_PACKET_Z_CLIPPING_SIZE 9 +#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9 +#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9 +#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16 +#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11 +#define VC4_PACKET_CLEAR_COLORS_SIZE 14 +#define VC4_PACKET_TILE_COORDINATES_SIZE 3 +#define VC4_PACKET_GEM_HANDLES_SIZE 9 + +/* Number of multisamples supported. */ +#define VC4_MAX_SAMPLES 4 +/* Size of a full resolution color or Z tile buffer load/store. */ +#define VC4_TILE_BUFFER_SIZE (64 * 64 * 4) + +#define VC4_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) +/* Using the GNU statement expression extension */ +#define VC4_SET_FIELD(value, field) \ + ({ \ + uint32_t fieldval = (value) << field ## _SHIFT; \ + assert((fieldval & ~ field ## _MASK) == 0); \ + fieldval & field ## _MASK; \ + }) + +#define VC4_GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) + +/** @{ + * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and + * VC4_PACKET_TILE_RENDERING_MODE_CONFIG. +*/ +#define VC4_TILING_FORMAT_LINEAR 0 +#define VC4_TILING_FORMAT_T 1 +#define VC4_TILING_FORMAT_LT 2 +/** @} */ + +/** @{ + * + * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and + * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. + */ +#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3) +#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2) +#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1) +#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0) + +/** @{ + * + * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and + * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. + */ +#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3) +#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2) +#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1) +#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0) + +/** @{ + * + * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address) + */ + +#define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3) +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2) +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1) +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0) + +/** @} */ + +/** @{ + * + * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL + */ +#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15) +#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14) +#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13) +#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12) + +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 +#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0 +#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1 +#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2 +/** @} */ + +/** @{ + * + * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and + * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL + */ +#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6) +#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6 +#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6) +#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6) +#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6) + +/** The values of the field are VC4_TILING_FORMAT_* */ +#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4) +#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4 + +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0) +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0 +#define VC4_LOADSTORE_TILE_BUFFER_NONE 0 +#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1 +#define VC4_LOADSTORE_TILE_BUFFER_ZS 2 +#define VC4_LOADSTORE_TILE_BUFFER_Z 3 +#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4 +#define VC4_LOADSTORE_TILE_BUFFER_FULL 5 +/** @} */ + +#define VC4_INDEX_BUFFER_U8 (0 << 4) +#define VC4_INDEX_BUFFER_U16 (1 << 4) + +/* This flag is only present in NV shader state. */ +#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3) +#define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2) +#define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1) +#define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0) + +/** @{ byte 2 of config bits. */ +#define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1) +#define VC4_CONFIG_BITS_EARLY_Z (1 << 0) +/** @} */ + +/** @{ byte 1 of config bits. */ +#define VC4_CONFIG_BITS_Z_UPDATE (1 << 7) +/** same values in this 3-bit field as PIPE_FUNC_* */ +#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4 +#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3) + +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1) +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1) +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1) +#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1) + +#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0) +/** @} */ + +/** @{ byte 0 of config bits. */ +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6) +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6) +#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6) + +#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4) +#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3) +#define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2) +#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1) +#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0) +/** @} */ + +/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ +#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7) + +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3 + +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3) +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 + +#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2) +#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1) +#define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0) +/** @} */ + +/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */ +#define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12) +#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11) +#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10) +#define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9) +#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8) + +/** The values of the field are VC4_TILING_FORMAT_* */ +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6 + +#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4) +#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4) +#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4) + +#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2) +#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2 +#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0 +#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 +#define VC4_RENDER_CONFIG_FORMAT_BGR565 2 + +#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1) +#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0) + +#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4) +#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0) + +enum vc4_texture_data_type { + VC4_TEXTURE_TYPE_RGBA8888 = 0, + VC4_TEXTURE_TYPE_RGBX8888 = 1, + VC4_TEXTURE_TYPE_RGBA4444 = 2, + VC4_TEXTURE_TYPE_RGBA5551 = 3, + VC4_TEXTURE_TYPE_RGB565 = 4, + VC4_TEXTURE_TYPE_LUMINANCE = 5, + VC4_TEXTURE_TYPE_ALPHA = 6, + VC4_TEXTURE_TYPE_LUMALPHA = 7, + VC4_TEXTURE_TYPE_ETC1 = 8, + VC4_TEXTURE_TYPE_S16F = 9, + VC4_TEXTURE_TYPE_S8 = 10, + VC4_TEXTURE_TYPE_S16 = 11, + VC4_TEXTURE_TYPE_BW1 = 12, + VC4_TEXTURE_TYPE_A4 = 13, + VC4_TEXTURE_TYPE_A1 = 14, + VC4_TEXTURE_TYPE_RGBA64 = 15, + VC4_TEXTURE_TYPE_RGBA32R = 16, + VC4_TEXTURE_TYPE_YUV422R = 17, +}; + +#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12) +#define VC4_TEX_P0_OFFSET_SHIFT 12 +#define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10) +#define VC4_TEX_P0_CSWIZ_SHIFT 10 +#define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9) +#define VC4_TEX_P0_CMMODE_SHIFT 9 +#define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8) +#define VC4_TEX_P0_FLIPY_SHIFT 8 +#define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4) +#define VC4_TEX_P0_TYPE_SHIFT 4 +#define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0) +#define VC4_TEX_P0_MIPLVLS_SHIFT 0 + +#define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31) +#define VC4_TEX_P1_TYPE4_SHIFT 31 +#define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20) +#define VC4_TEX_P1_HEIGHT_SHIFT 20 +#define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19) +#define VC4_TEX_P1_ETCFLIP_SHIFT 19 +#define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8) +#define VC4_TEX_P1_WIDTH_SHIFT 8 + +#define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7) +#define VC4_TEX_P1_MAGFILT_SHIFT 7 +# define VC4_TEX_P1_MAGFILT_LINEAR 0 +# define VC4_TEX_P1_MAGFILT_NEAREST 1 + +#define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4) +#define VC4_TEX_P1_MINFILT_SHIFT 4 +# define VC4_TEX_P1_MINFILT_LINEAR 0 +# define VC4_TEX_P1_MINFILT_NEAREST 1 +# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2 +# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3 +# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4 +# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5 + +#define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2) +#define VC4_TEX_P1_WRAP_T_SHIFT 2 +#define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0) +#define VC4_TEX_P1_WRAP_S_SHIFT 0 +# define VC4_TEX_P1_WRAP_REPEAT 0 +# define VC4_TEX_P1_WRAP_CLAMP 1 +# define VC4_TEX_P1_WRAP_MIRROR 2 +# define VC4_TEX_P1_WRAP_BORDER 3 + +#define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30) +#define VC4_TEX_P2_PTYPE_SHIFT 30 +# define VC4_TEX_P2_PTYPE_IGNORED 0 +# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1 +# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2 +# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3 + +/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */ +#define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12) +#define VC4_TEX_P2_CMST_SHIFT 12 +#define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0) +#define VC4_TEX_P2_BSLOD_SHIFT 0 + +/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */ +#define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12) +#define VC4_TEX_P2_CHEIGHT_SHIFT 12 +#define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0) +#define VC4_TEX_P2_CWIDTH_SHIFT 0 + +/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */ +#define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12) +#define VC4_TEX_P2_CYOFF_SHIFT 12 +#define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0) +#define VC4_TEX_P2_CXOFF_SHIFT 0 + +#endif /* VC4_PACKET_H */