1
0
mirror of https://github.com/Yours3lf/rpi-vk-driver.git synced 2025-03-22 13:19:43 +01:00

added caps

This commit is contained in:
Unknown 2018-05-20 20:41:48 +01:00
parent 18203f1a0c
commit 2026f0d565
3 changed files with 380 additions and 328 deletions

View File

@ -44,23 +44,25 @@ uint32_t clSize(ControlList* cl)
return cl->nextFreeByte - cl->buffer;
}
void clAllocateSpace(ControlList* cl, uint32_t size)
uint32_t clHasEnoughSpace(ControlList* cl, uint32_t size)
{
uint32_t currSize = clSize(cl);
if(currSize + size < CONTROL_LIST_SIZE - 5)
if(currSize + size < CONTROL_LIST_SIZE)
{
return; //fits!
return 1; //fits!
}
else
{
return 0; //need to reallocate
}
}
void clInit(ControlList* cl)
void clInit(ControlList* cl, void* buffer)
{
assert(cl);
assert(cl->buffer);
assert(buffer);
cl->buffer = buffer;
cl->numBlocks = 1;
cl->nextFreeByte = &cl->buffer[0];
}
@ -85,6 +87,7 @@ void clInsertNop(ControlList* cl)
void clInsertFlush(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_opcode;
cl->nextFreeByte++;
@ -93,6 +96,7 @@ void clInsertFlush(ControlList* cl)
void clInsertFlushAllState(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLUSH_ALL_STATE_opcode;
cl->nextFreeByte++;
@ -101,6 +105,7 @@ void clInsertFlushAllState(ControlList* cl)
void clInsertStartTileBinning(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_START_TILE_BINNING_opcode;
cl->nextFreeByte++;
@ -109,6 +114,7 @@ void clInsertStartTileBinning(ControlList* cl)
void clInsertIncrementSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INCREMENT_SEMAPHORE_opcode;
cl->nextFreeByte++;
@ -117,6 +123,7 @@ void clInsertIncrementSemaphore(ControlList* cl)
void clInsertWaitOnSemaphore(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_WAIT_ON_SEMAPHORE_opcode;
cl->nextFreeByte++;
@ -126,6 +133,7 @@ void clInsertWaitOnSemaphore(ControlList* cl)
void clInsertBranch(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_opcode; cls->nextFreeByte++;
//TODO is this correct?
@ -137,6 +145,7 @@ void clInsertBranch(ControlList* cls, ControlListAddress address)
void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_BRANCH_TO_SUB_LIST_opcode; cls->nextFreeByte++;
//TODO is this correct?
@ -147,6 +156,7 @@ void clInsertBranchToSubList(ControlList* cls, ControlListAddress address)
void clInsertReturnFromSubList(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RETURN_FROM_SUB_LIST_opcode;
cl->nextFreeByte++;
@ -155,6 +165,7 @@ void clInsertReturnFromSubList(ControlList* cl)
void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_opcode;
cl->nextFreeByte++;
@ -163,6 +174,7 @@ void clInsertStoreMultiSampleResolvedTileColorBuffer(ControlList* cl)
void clInsertStoreMultiSampleResolvedTileColorBufferAndEOF(ControlList* cl)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_AND_EOF_opcode;
cl->nextFreeByte++;
@ -177,6 +189,7 @@ void clInsertStoreFullResolutionTileBuffer(ControlList* cls,
uint32_t disableColorBufferWrite) //0/1
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_STORE_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
//TODO is this correct?
@ -197,6 +210,7 @@ void clInsertReLoadFullResolutionTileBuffer(ControlList* cls,
uint32_t disableColorBufferRead) //0/1
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_RE_LOAD_FULL_RESOLUTION_TILE_BUFFER_opcode; cls->nextFreeByte++;
//TODO is this correct?
@ -223,6 +237,7 @@ void clInsertStoreTileBufferGeneral(ControlList* cls,
uint32_t bufferToStore) //0/1/2/3/5 none/color/zstencil/z/full
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_STORE_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
//TODO is this correct?
@ -259,6 +274,7 @@ void clInsertLoadTileBufferGeneral(ControlList* cls,
uint32_t bufferToLoad) //0/1/2/3/5 none/color/zstencil/z/full
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_LOAD_TILE_BUFFER_GENERAL_opcode; cls->nextFreeByte++;
//TODO is this correct?
@ -287,6 +303,7 @@ void clInsertIndexedPrimitiveList(ControlList* cl,
enum V3D21_Primitive primitiveMode)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_INDEXED_PRIMITIVE_LIST_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(indexType, 4, 4) | moveBits(primitiveMode, 4, 0); cl->nextFreeByte++;
@ -313,6 +330,7 @@ void clInsertPrimitiveListFormat(ControlList* cl,
uint32_t primitiveType) //0/1/2/3: point/line/tri/rhy
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_PRIMITIVE_LIST_FORMAT_opcode; cl->nextFreeByte++;
*cl->nextFreeByte = moveBits(dataType, 4, 4) | moveBits(primitiveType, 4, 0); cl->nextFreeByte++;
@ -339,6 +357,7 @@ void clInsertClearColors(ControlList* cl,
uint64_t clearColor) //2x RGBA8 or 1x RGBA16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLEAR_COLORS_opcode; cl->nextFreeByte++;
*(uint64_t*)cl->nextFreeByte = clearColor; cl->nextFreeByte += 8;
@ -362,6 +381,7 @@ void clInsertConfigurationBits(ControlList* cl,
uint32_t enableForwardFacingPrimitive) //0/1
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CONFIGURATION_BITS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte =
@ -384,6 +404,7 @@ void clInsertFlatShadeFlags(ControlList* cl,
uint32_t flags)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_FLAT_SHADE_FLAGS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = flags; cl->nextFreeByte += 4;
@ -393,6 +414,7 @@ void clInsertPointSize(ControlList* cl,
float size)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_POINT_SIZE_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = size; cl->nextFreeByte += 4;
@ -402,6 +424,7 @@ void clInsertLineWidth(ControlList* cl,
float width)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_LINE_WIDTH_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
@ -411,6 +434,7 @@ void clInsertRHTXBoundary(ControlList* cl,
uint32_t boundary) //sint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_RHT_X_BOUNDARY_opcode; cl->nextFreeByte++;
*(uint16_t*)cl->nextFreeByte = moveBits(boundary, 16, 0); cl->nextFreeByte += 2;
@ -433,6 +457,7 @@ void clInsertClipWindow(ControlList* cl,
uint32_t leftPixelCoord) //uint16
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIP_WINDOW_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(leftPixelCoord, 16, 0) | moveBits(bottomPixelCoord, 16, 16); cl->nextFreeByte += 4;
@ -445,6 +470,7 @@ void clInsertViewPortOffset(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_VIEWPORT_OFFSET_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = moveBits(x, 16, 0) | moveBits(y, 16, 16); cl->nextFreeByte += 4;
@ -468,6 +494,7 @@ void clInsertClipperXYScaling(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_XY_SCALING_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = width; cl->nextFreeByte += 4;
@ -480,6 +507,7 @@ void clInsertClipperZScaleOffset(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_CLIPPER_Z_SCALE_AND_OFFSET_opcode; cl->nextFreeByte++;
*(float*)cl->nextFreeByte = zScale; cl->nextFreeByte += 4;
@ -501,6 +529,7 @@ void clInsertTileBinningModeConfiguration(ControlList* cl,
)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_TILE_BINNING_MODE_CONFIGURATION_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = tileAllocationMemoryAddress; cl->nextFreeByte += 4;
@ -531,6 +560,7 @@ void clInsertTileRenderingModeConfiguration(ControlList* cls,
uint32_t heightPixels)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
*cls->nextFreeByte = V3D21_TILE_RENDERING_MODE_CONFIGURATION_opcode; cls->nextFreeByte++;
//TODO is this correct?
@ -555,6 +585,7 @@ void clInsertTileCoordinates(ControlList* cl,
uint32_t tileRowNumber) //int8
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_TILE_COORDINATES_opcode; cl->nextFreeByte++;
*(uint16_t*)cl->nextFreeByte = moveBits(tileColumnNumber, 8, 0) | moveBits(tileRowNumber, 8, 8); cl->nextFreeByte += 2;
@ -565,6 +596,7 @@ void clInsertGEMRelocations(ControlList* cl,
uint32_t buffer1)
{
assert(cl);
assert(cl->buffer);
assert(cl->nextFreeByte);
*cl->nextFreeByte = V3D21_GEM_RELOCATIONS_opcode; cl->nextFreeByte++;
*(uint32_t*)cl->nextFreeByte = buffer0; cl->nextFreeByte += 4;
@ -587,6 +619,7 @@ void clInsertShaderRecord(ControlList* cls,
ControlListAddress vertexCodeAddress)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
//TODO is this correct?
*cls->nextFreeByte =
@ -618,6 +651,7 @@ void clInsertAttributeRecord(ControlList* cls,
uint32_t vertexVPMOffset)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
uint32_t sizeBytesMinusOne = sizeBytes - 1;
//TODO is this correct?
@ -629,10 +663,34 @@ void clInsertAttributeRecord(ControlList* cls,
cls->nextFreeByte++; //skip coordinate shader stuff
}
uint32_t clGetHandleIndex(ControlList* handlesCl, uint32_t handle)
{
uint32_t c = 0;
uint32_t numHandles = clSize(handlesCl) / 4;
for(; c < numHandles; ++c)
{
if(((uint32_t*)handlesCl->buffer)[c] == handle)
{
//found
return c;
}
}
//write handle to handles cl
*(uint32_t*)handlesCl->nextFreeByte = handle;
handlesCl->nextFreeByte += 4;
return c;
}
//input: 2 cls (cl + handles cl)
static inline void clEmitShaderRelocation(ControlList* cls, const ControlListAddress* address)
{
assert(cls);
assert(cls->buffer);
assert(cls->nextFreeByte);
assert(address);
assert(address->handle);
@ -642,26 +700,8 @@ static inline void clEmitShaderRelocation(ControlList* cls, const ControlListAdd
ControlList* cl = cls;
ControlList* handlesCl = cls + 1;
uint32_t c = 0;
uint32_t numHandles = (handlesCl->nextFreeByte - handlesCl->buffer) / 4;
for(; c < numHandles; ++c)
{
if(((uint32_t*)handlesCl->buffer)[c] == address->handle)
{
//found
*(uint32_t*)cl->nextFreeByte = c; //store offset within handles in cl
cl->nextFreeByte += 4;
return;
}
}
//else write handle to handles cl
*(uint32_t*)handlesCl->nextFreeByte = address->handle;
handlesCl->nextFreeByte += 4;
*(uint32_t*)cl->nextFreeByte = c; //store offset within handles in cl
//store offset within handles in cl
*(uint32_t*)cl->nextFreeByte = clGetHandleIndex(handlesCl, address->handle);
cl->nextFreeByte += 4;
}

View File

@ -27,177 +27,7 @@
#define max(a, b) (a > b ? a : b)
#endif
VkPhysicalDeviceLimits _limits =
{
//TODO these values might change
.maxImageDimension1D = 16384,
.maxImageDimension2D = 16384,
.maxImageDimension3D = 2084,
.maxImageDimensionCube = 16384,
.maxImageArrayLayers = 2048,
.maxTexelBufferElements = 134217728,
.maxUniformBufferRange = 65536,
.maxStorageBufferRange = 4294967295,
.maxPushConstantsSize = 256,
.maxMemoryAllocationCount = 4096,
.maxSamplerAllocationCount = 4000,
.bufferImageGranularity = 0x400, //TODO 1KB?
.sparseAddressSpaceSize = 0xffffffff, //32 bits
.maxBoundDescriptorSets = 8,
.maxPerStageDescriptorSamplers = 4000,
.maxPerStageDescriptorUniformBuffers = 12,
.maxPerStageDescriptorStorageBuffers = 4096,
.maxPerStageDescriptorSampledImages = 16384,
.maxPerStageDescriptorStorageImages = 16384,
.maxPerStageDescriptorInputAttachments = 8, //TODO
.maxPerStageResources = 53268,
.maxDescriptorSetSamplers = 4000,
.maxDescriptorSetUniformBuffers = 72, //TODO
.maxDescriptorSetUniformBuffersDynamic = 72,
.maxDescriptorSetStorageBuffers = 4096,
.maxDescriptorSetStorageBuffersDynamic = 16,
.maxDescriptorSetSampledImages = 98304,
.maxDescriptorSetStorageImages = 98304,
.maxDescriptorSetInputAttachments = 8, //TODO
.maxVertexInputAttributes = 32,
.maxVertexInputBindings = 32,
.maxVertexInputAttributeOffset = 2047,
.maxVertexInputBindingStride = 2048,
.maxVertexOutputComponents = 128,
.maxTessellationGenerationLevel = 0, //No tessellation
.maxTessellationPatchSize = 0,
.maxTessellationControlPerVertexInputComponents = 0,
.maxTessellationControlPerVertexOutputComponents = 0,
.maxTessellationControlPerPatchOutputComponents = 0,
.maxTessellationControlTotalOutputComponents = 0,
.maxTessellationEvaluationInputComponents = 0,
.maxTessellationEvaluationOutputComponents = 0,
.maxGeometryShaderInvocations = 0, //TODO no geometry shaders for now
.maxGeometryInputComponents = 0,
.maxGeometryOutputComponents = 0,
.maxGeometryOutputVertices = 0,
.maxGeometryTotalOutputComponents = 0,
.maxFragmentInputComponents = 128,
.maxFragmentOutputAttachments = 8,
.maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 16,
.maxComputeSharedMemorySize = 0, //TODO no compute for now
.maxComputeWorkGroupCount = {0,0,0},
.maxComputeWorkGroupInvocations = 0,
.maxComputeWorkGroupSize = {0,0,0},
.subPixelPrecisionBits = 8,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
.maxDrawIndexedIndexValue = 4294967295,
.maxDrawIndirectCount = 4294967295,
.maxSamplerLodBias = 15,
.maxSamplerAnisotropy = 16.0,
.maxViewports = 16,
.maxViewportDimensions = {16384,16384},
.viewportBoundsRange = {-32768,32768},
.viewportSubPixelBits = 8,
.minMemoryMapAlignment = 0x40, //TODO
.minTexelBufferOffsetAlignment = 0x10,
.minUniformBufferOffsetAlignment = 0x100,
.minStorageBufferOffsetAlignment = 0x20,
.minTexelOffset = -8,
.maxTexelOffset = 7,
.minTexelGatherOffset = -32,
.maxTexelGatherOffset = 31,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.4375,
.subPixelInterpolationOffsetBits = 4,
.maxFramebufferWidth = 16384,
.maxFramebufferHeight = 16384,
.maxFramebufferLayers = 2048,
.framebufferColorSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.framebufferDepthSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.framebufferStencilSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.framebufferNoAttachmentsSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.maxColorAttachments = 8,
.sampledImageColorSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.sampledImageDepthSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.sampledImageStencilSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = 1,
.timestampPeriod = 1,
.maxClipDistances = 8,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.discreteQueuePriorities = 1,
.pointSizeRange = {1, 189.875},
.lineWidthRange = {0.5, 10},
.pointSizeGranularity = 0.125,
.lineWidthGranularity = 0.125,
.strictLines = 0, //TODO
.standardSampleLocations = 1,
.optimalBufferCopyOffsetAlignment = 0x1,
.optimalBufferCopyRowPitchAlignment = 0x1,
.nonCoherentAtomSize = 0x40
};
VkPhysicalDeviceFeatures _features =
{
//TODO this might change
.robustBufferAccess = 1,
.fullDrawIndexUint32 = 1, //TODO
.imageCubeArray = 1, //TODO
.independentBlend = 1,
.geometryShader = 0,
.tessellationShader = 0,
.sampleRateShading = 1, //TODO
.dualSrcBlend = 1,
.logicOp = 1,
.multiDrawIndirect = 1,
.drawIndirectFirstInstance = 1,
.depthClamp = 1,
.depthBiasClamp = 1,
.fillModeNonSolid = 1,
.depthBounds = 1,
.wideLines = 1,
.largePoints = 1,
.alphaToOne = 1,
.multiViewport = 1,
.samplerAnisotropy = 1,
.textureCompressionETC2 = 0,
.textureCompressionASTC_LDR = 0,
.textureCompressionBC = 0,
.occlusionQueryPrecise = 1,
.pipelineStatisticsQuery = 1,
.vertexPipelineStoresAndAtomics = 1,
.fragmentStoresAndAtomics = 1,
.shaderTessellationAndGeometryPointSize = 0,
.shaderImageGatherExtended = 1,
.shaderStorageImageExtendedFormats = 1,
.shaderStorageImageMultisample = 1,
.shaderStorageImageReadWithoutFormat = 0,
.shaderStorageImageWriteWithoutFormat = 0,
.shaderUniformBufferArrayDynamicIndexing = 1,
.shaderSampledImageArrayDynamicIndexing = 1,
.shaderStorageBufferArrayDynamicIndexing = 1,
.shaderStorageImageArrayDynamicIndexing = 1,
.shaderClipDistance = 1,
.shaderCullDistance = 1,
.shaderFloat64 = 0,
.shaderInt64 = 0,
.shaderInt16 = 0,
.shaderResourceResidency = 1,
.shaderResourceMinLod = 1,
.sparseBinding = 1,
.sparseResidencyBuffer = 1,
.sparseResidencyImage2D = 1,
.sparseResidencyImage3D = 1,
.sparseResidency2Samples = 1,
.sparseResidency4Samples = 1,
.sparseResidency8Samples = 1,
.sparseResidency16Samples = 0,
.sparseResidencyAliased = 1,
.variableMultisampleRate = 1,
.inheritedQueries = 1,
};
#define numFeatures (sizeof(_features)/sizeof(VkBool32))
#include "vkCaps.h"
typedef struct VkPhysicalDevice_T
{
@ -210,6 +40,12 @@ typedef struct VkQueue_T
int dummy;
} _queue;
typedef struct VkCommandPool_T
{
PoolAllocator pa;
ConsecutivePoolAllocator cpa;
} _commandPool;
typedef enum commandBufferState
{
CMDBUF_STATE_INITIAL = 0,
@ -232,88 +68,9 @@ typedef struct VkCommandBuffer_T
ControlList handlesCl;
commandBufferState state;
VkCommandBufferUsageFlags usageFlags;
_commandPool* cp;
} _commandBuffer;
typedef struct VkCommandPool_T
{
int usePoolAllocator;
PoolAllocator pa;
LinearAllocator la;
} _commandPool;
VkQueueFamilyProperties _queueFamilyProperties[] =
{
{
.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = 1,
.timestampValidBits = 64, //TODO
.minImageTransferGranularity = {1, 1, 1}
}
};
#define numQueueFamilies (sizeof(_queueFamilyProperties)/sizeof(VkQueueFamilyProperties))
static VkExtensionProperties instanceExtensions[] =
{
{
.extensionName = "VK_KHR_surface",
.specVersion = 25
},
{
.extensionName = "VK_KHR_display",
.specVersion = 21
},
{
.extensionName = "VK_EXT_direct_mode_display",
.specVersion = 1
},
{
.extensionName = "VK_EXT_debug_report",
.specVersion = 9
},
{
.extensionName = "VK_EXT_debug_utils",
.specVersion = 1
},
{
.extensionName = "VK_KHR_rpi_surface",
.specVersion = 1
}
};
#define numInstanceExtensions (sizeof(instanceExtensions) / sizeof(VkExtensionProperties))
static VkExtensionProperties deviceExtensions[] =
{
{
.extensionName = "VK_KHR_display_swapchain",
.specVersion = 9
},
{
.extensionName = "VK_KHR_maintenance1",
.specVersion = 2
},
{
.extensionName = "VK_KHR_maintenance2",
.specVersion = 1
},
{
.extensionName = "VK_KHR_maintenance3",
.specVersion = 1
},
{
.extensionName = "VK_KHR_swapchain",
.specVersion = 70
},
{
.extensionName = "VK_EXT_debug_marker",
.specVersion = 4
},
{
.extensionName = "VK_EXT_display_control",
.specVersion = 1
}
};
#define numDeviceExtensions (sizeof(deviceExtensions) / sizeof(VkExtensionProperties))
typedef struct VkInstance_T
{
//supposedly this should contain all the enabled layers?
@ -1070,27 +827,24 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool(
//initial number of command buffers to hold
int numCommandBufs = 100;
int controlListSize = ARM_PAGE_SIZE * 100;
if(pCreateInfo->flags & VK_COMMAND_POOL_CREATE_TRANSIENT_BIT)
//if(pCreateInfo->flags & VK_COMMAND_POOL_CREATE_TRANSIENT_BIT)
{
//use pool allocator
cp->usePoolAllocator = 1;
void* cpmem = malloc(numCommandBufs * sizeof(_commandBuffer));
if(!cpmem)
void* pamem = malloc(numCommandBufs * sizeof(_commandBuffer));
if(!pamem)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->pa = createPoolAllocator(cpmem, sizeof(_commandBuffer), numCommandBufs * sizeof(_commandBuffer));
}
else
{
cp->usePoolAllocator = 0;
void* cpmem = malloc(numCommandBufs * sizeof(_commandBuffer));
if(!cpmem)
cp->pa = createPoolAllocator(pamem, sizeof(_commandBuffer), numCommandBufs * sizeof(_commandBuffer));
void* cpamem = malloc(controlListSize);
if(!cpamem)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cp->la = createLinearAllocator(cpmem, numCommandBufs * sizeof(_commandBuffer));
cp->cpa = createConsecutivePoolAllocator(cpamem, ARM_PAGE_SIZE, controlListSize);
}
*pCommandPool = (VkCommandPool)cp;
@ -1116,34 +870,48 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
_commandPool* cp = (_commandPool*)pAllocateInfo->commandPool;
if(cp->usePoolAllocator)
//if(cp->usePoolAllocator)
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
pCommandBuffers[c] = poolAllocate(&cp->pa);
pCommandBuffers[c]->shaderRecCount = 0;
pCommandBuffers[c]->usageFlags = 0;
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
if(!pCommandBuffers[c])
{
res = VK_ERROR_OUT_OF_HOST_MEMORY; //TODO or VK_ERROR_OUT_OF_DEVICE_MEMORY?
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
}
}
else
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
pCommandBuffers[c] = linearAllocte(&cp->la, sizeof(_commandBuffer));
pCommandBuffers[c]->shaderRecCount = 0;
pCommandBuffers[c]->usageFlags = 0;
pCommandBuffers[c]->state = CMDBUF_STATE_INITIAL;
pCommandBuffers[c]->cp = cp;
clInit(&pCommandBuffers[c]->binCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->handlesCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->shaderRecCl, consecutivePoolAllocate(&cp->cpa, 1));
clInit(&pCommandBuffers[c]->uniformsCl, consecutivePoolAllocate(&cp->cpa, 1));
if(!pCommandBuffers[c])
if(!pCommandBuffers[c]->binCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY; //TODO or VK_ERROR_OUT_OF_DEVICE_MEMORY?
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->handlesCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->shaderRecCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
if(!pCommandBuffers[c]->uniformsCl.buffer)
{
res = VK_ERROR_OUT_OF_HOST_MEMORY;
break;
}
}
@ -1151,22 +919,18 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers(
if(res != VK_SUCCESS)
{
if(cp->usePoolAllocator)
//if(cp->usePoolAllocator)
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
pCommandBuffers[c] = 0;
}
}
else
{
for(int c = 0; c < pAllocateInfo->commandBufferCount; ++c)
{
//we don't really free linear memory, just reset the whole command pool
pCommandBuffers[c] = 0;
}
}
}
return res;
@ -1196,10 +960,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer(
commandBuffer->usageFlags = pBeginInfo->flags;
commandBuffer->shaderRecCount = 0;
commandBuffer->state = CMDBUF_STATE_RECORDING;
clInit(&commandBuffer->binCl);
clInit(&commandBuffer->handlesCl);
clInit(&commandBuffer->shaderRecCl);
clInit(&commandBuffer->uniformsCl);
return VK_SUCCESS;
}
@ -1273,7 +1033,14 @@ VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer(
{
assert(commandBuffer);
//TODO add increment semaphore and flush
//Increment the semaphore indicating that binning is done and
//unblocking the render thread. Note that this doesn't act
//until the FLUSH completes.
//The FLUSH caps all of our bin lists with a
//VC4_PACKET_RETURN.
clHasEnoughSpace(&commandBuffer->binCl, V3D21_INCREMENT_SEMAPHORE_length);
clInsertIncrementSemaphore(&commandBuffer->binCl);
clInsertFlush(&commandBuffer->binCl);
commandBuffer->state = CMDBUF_STATE_EXECUTABLE;
@ -1478,14 +1245,14 @@ VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers(
for(int c = 0; c < commandBufferCount; ++c)
{
if(cp->usePoolAllocator)
//if(cp->usePoolAllocator)
{
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->binCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->handlesCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->shaderRecCl, pCommandBuffers[c]->binCl.numBlocks);
consecutivePoolFree(&cp->cpa, &pCommandBuffers[c]->uniformsCl, pCommandBuffers[c]->binCl.numBlocks);
poolFree(&cp->pa, pCommandBuffers[c]);
}
else
{
linearFree(&cp->la, pCommandBuffers[c]);
}
}
}
@ -1508,15 +1275,12 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool(
_commandPool* cp = (_commandPool*)commandPool;
if(cp->usePoolAllocator)
//if(cp->usePoolAllocator)
{
free(cp->pa.buf);
free(cp->cpa.buf);
destroyPoolAllocator(&cp->pa);
}
else
{
free(cp->la.buf);
destroyLinearAllocator(&cp->la);
destroyConsecutivePoolAllocator(&cp->cpa);
}
free(cp);

248
driver/vkCaps.h Normal file
View File

@ -0,0 +1,248 @@
#pragma once
#include <vulkan/vulkan.h>
VkPhysicalDeviceLimits _limits =
{
//TODO these values might change
.maxImageDimension1D = 16384,
.maxImageDimension2D = 16384,
.maxImageDimension3D = 2084,
.maxImageDimensionCube = 16384,
.maxImageArrayLayers = 2048,
.maxTexelBufferElements = 134217728,
.maxUniformBufferRange = 65536,
.maxStorageBufferRange = 4294967295,
.maxPushConstantsSize = 256,
.maxMemoryAllocationCount = 4096,
.maxSamplerAllocationCount = 4000,
.bufferImageGranularity = 0x400, //TODO 1KB?
.sparseAddressSpaceSize = 0xffffffff, //32 bits
.maxBoundDescriptorSets = 8,
.maxPerStageDescriptorSamplers = 4000,
.maxPerStageDescriptorUniformBuffers = 12,
.maxPerStageDescriptorStorageBuffers = 4096,
.maxPerStageDescriptorSampledImages = 16384,
.maxPerStageDescriptorStorageImages = 16384,
.maxPerStageDescriptorInputAttachments = 8, //TODO
.maxPerStageResources = 53268,
.maxDescriptorSetSamplers = 4000,
.maxDescriptorSetUniformBuffers = 72, //TODO
.maxDescriptorSetUniformBuffersDynamic = 72,
.maxDescriptorSetStorageBuffers = 4096,
.maxDescriptorSetStorageBuffersDynamic = 16,
.maxDescriptorSetSampledImages = 98304,
.maxDescriptorSetStorageImages = 98304,
.maxDescriptorSetInputAttachments = 8, //TODO
.maxVertexInputAttributes = 32,
.maxVertexInputBindings = 32,
.maxVertexInputAttributeOffset = 2047,
.maxVertexInputBindingStride = 2048,
.maxVertexOutputComponents = 128,
.maxTessellationGenerationLevel = 0, //No tessellation
.maxTessellationPatchSize = 0,
.maxTessellationControlPerVertexInputComponents = 0,
.maxTessellationControlPerVertexOutputComponents = 0,
.maxTessellationControlPerPatchOutputComponents = 0,
.maxTessellationControlTotalOutputComponents = 0,
.maxTessellationEvaluationInputComponents = 0,
.maxTessellationEvaluationOutputComponents = 0,
.maxGeometryShaderInvocations = 0, //TODO no geometry shaders for now
.maxGeometryInputComponents = 0,
.maxGeometryOutputComponents = 0,
.maxGeometryOutputVertices = 0,
.maxGeometryTotalOutputComponents = 0,
.maxFragmentInputComponents = 128,
.maxFragmentOutputAttachments = 8,
.maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 16,
.maxComputeSharedMemorySize = 0, //TODO no compute for now
.maxComputeWorkGroupCount = {0,0,0},
.maxComputeWorkGroupInvocations = 0,
.maxComputeWorkGroupSize = {0,0,0},
.subPixelPrecisionBits = 8,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
.maxDrawIndexedIndexValue = 4294967295,
.maxDrawIndirectCount = 4294967295,
.maxSamplerLodBias = 15,
.maxSamplerAnisotropy = 16.0,
.maxViewports = 16,
.maxViewportDimensions = {16384,16384},
.viewportBoundsRange = {-32768,32768},
.viewportSubPixelBits = 8,
.minMemoryMapAlignment = 0x40, //TODO
.minTexelBufferOffsetAlignment = 0x10,
.minUniformBufferOffsetAlignment = 0x100,
.minStorageBufferOffsetAlignment = 0x20,
.minTexelOffset = -8,
.maxTexelOffset = 7,
.minTexelGatherOffset = -32,
.maxTexelGatherOffset = 31,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.4375,
.subPixelInterpolationOffsetBits = 4,
.maxFramebufferWidth = 16384,
.maxFramebufferHeight = 16384,
.maxFramebufferLayers = 2048,
.framebufferColorSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.framebufferDepthSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.framebufferStencilSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.framebufferNoAttachmentsSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.maxColorAttachments = 8,
.sampledImageColorSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.sampledImageDepthSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.sampledImageStencilSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = 1,
.timestampPeriod = 1,
.maxClipDistances = 8,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.discreteQueuePriorities = 1,
.pointSizeRange = {1, 189.875},
.lineWidthRange = {0.5, 10},
.pointSizeGranularity = 0.125,
.lineWidthGranularity = 0.125,
.strictLines = 0, //TODO
.standardSampleLocations = 1,
.optimalBufferCopyOffsetAlignment = 0x1,
.optimalBufferCopyRowPitchAlignment = 0x1,
.nonCoherentAtomSize = 0x40
};
VkPhysicalDeviceFeatures _features =
{
//TODO this might change
.robustBufferAccess = 1,
.fullDrawIndexUint32 = 1, //TODO
.imageCubeArray = 1, //TODO
.independentBlend = 1,
.geometryShader = 0,
.tessellationShader = 0,
.sampleRateShading = 1, //TODO
.dualSrcBlend = 1,
.logicOp = 1,
.multiDrawIndirect = 1,
.drawIndirectFirstInstance = 1,
.depthClamp = 1,
.depthBiasClamp = 1,
.fillModeNonSolid = 1,
.depthBounds = 1,
.wideLines = 1,
.largePoints = 1,
.alphaToOne = 1,
.multiViewport = 1,
.samplerAnisotropy = 1,
.textureCompressionETC2 = 0,
.textureCompressionASTC_LDR = 0,
.textureCompressionBC = 0,
.occlusionQueryPrecise = 1,
.pipelineStatisticsQuery = 1,
.vertexPipelineStoresAndAtomics = 1,
.fragmentStoresAndAtomics = 1,
.shaderTessellationAndGeometryPointSize = 0,
.shaderImageGatherExtended = 1,
.shaderStorageImageExtendedFormats = 1,
.shaderStorageImageMultisample = 1,
.shaderStorageImageReadWithoutFormat = 0,
.shaderStorageImageWriteWithoutFormat = 0,
.shaderUniformBufferArrayDynamicIndexing = 1,
.shaderSampledImageArrayDynamicIndexing = 1,
.shaderStorageBufferArrayDynamicIndexing = 1,
.shaderStorageImageArrayDynamicIndexing = 1,
.shaderClipDistance = 1,
.shaderCullDistance = 1,
.shaderFloat64 = 0,
.shaderInt64 = 0,
.shaderInt16 = 0,
.shaderResourceResidency = 1,
.shaderResourceMinLod = 1,
.sparseBinding = 1,
.sparseResidencyBuffer = 1,
.sparseResidencyImage2D = 1,
.sparseResidencyImage3D = 1,
.sparseResidency2Samples = 1,
.sparseResidency4Samples = 1,
.sparseResidency8Samples = 1,
.sparseResidency16Samples = 0,
.sparseResidencyAliased = 1,
.variableMultisampleRate = 1,
.inheritedQueries = 1,
};
#define numFeatures (sizeof(_features)/sizeof(VkBool32))
VkQueueFamilyProperties _queueFamilyProperties[] =
{
{
.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = 1,
.timestampValidBits = 64, //TODO
.minImageTransferGranularity = {1, 1, 1}
}
};
#define numQueueFamilies (sizeof(_queueFamilyProperties)/sizeof(VkQueueFamilyProperties))
static VkExtensionProperties instanceExtensions[] =
{
{
.extensionName = "VK_KHR_surface",
.specVersion = 25
},
{
.extensionName = "VK_KHR_display",
.specVersion = 21
},
{
.extensionName = "VK_EXT_direct_mode_display",
.specVersion = 1
},
{
.extensionName = "VK_EXT_debug_report",
.specVersion = 9
},
{
.extensionName = "VK_EXT_debug_utils",
.specVersion = 1
},
{
.extensionName = "VK_KHR_rpi_surface",
.specVersion = 1
}
};
#define numInstanceExtensions (sizeof(instanceExtensions) / sizeof(VkExtensionProperties))
static VkExtensionProperties deviceExtensions[] =
{
{
.extensionName = "VK_KHR_display_swapchain",
.specVersion = 9
},
{
.extensionName = "VK_KHR_maintenance1",
.specVersion = 2
},
{
.extensionName = "VK_KHR_maintenance2",
.specVersion = 1
},
{
.extensionName = "VK_KHR_maintenance3",
.specVersion = 1
},
{
.extensionName = "VK_KHR_swapchain",
.specVersion = 70
},
{
.extensionName = "VK_EXT_debug_marker",
.specVersion = 4
},
{
.extensionName = "VK_EXT_display_control",
.specVersion = 1
}
};
#define numDeviceExtensions (sizeof(deviceExtensions) / sizeof(VkExtensionProperties))