diff --git a/driver/common.h b/driver/common.h index 88d5399..85807d5 100644 --- a/driver/common.h +++ b/driver/common.h @@ -487,6 +487,21 @@ typedef struct VkDescriptorPool_T uint32_t freeAble; } _descriptorPool; +typedef struct VkQuery_T +{ + uint32_t enabledCounters[VC4_PERFCNT_NUM_EVENTS]; + uint64_t counterValues[DRM_VC4_MAX_PERF_COUNTERS]; + uint32_t numEnabledCounters; + uint32_t perfmonIDs[2]; +} _query; + +typedef struct VkQueryPool_T +{ + VkQueryType type; + uint32_t queryCount; + _query* queryPool; +} _queryPool; + uint32_t getFormatBpp(VkFormat f); uint32_t packVec4IntoABGR8(const float rgba[4]); void createImageBO(_image* i); diff --git a/driver/device.c b/driver/device.c index 88d7291..c07a6cf 100644 --- a/driver/device.c +++ b/driver/device.c @@ -159,6 +159,53 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkGetPhysicalDeviceQueueFamilyProperties( *pQueueFamilyPropertyCount = elementsWritten; } +VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + uint32_t* pCounterCount, + VkPerformanceCounterKHR* pCounters, + VkPerformanceCounterDescriptionKHR* pCounterDescriptions) +{ + assert(physicalDevice); + assert(pCounterCount); + + if(!pCounters && !pCounterDescriptions) + { + *pCounterCount = numPerformanceCounterTypes; + return VK_SUCCESS; + } + + int arraySize = *pCounterCount; + int elementsWritten = min(numPerformanceCounterTypes, arraySize); + + for(int c = 0; c < elementsWritten; ++c) + { + pCounters[c] = performanceCounterTypes[c]; + pCounterDescriptions[c] = performanceCounterDescriptions[c]; + } + + *pCounterCount = elementsWritten; + + if(arraySize < numPerformanceCounterTypes) + { + return VK_INCOMPLETE; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( + VkPhysicalDevice physicalDevice, + const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo, + uint32_t* pNumPasses) +{ + assert(physicalDevice); + assert(pPerformanceQueryCreateInfo); + assert(pNumPasses); + + *pNumPasses = pPerformanceQueryCreateInfo->counterIndexCount / DRM_VC4_MAX_PERF_COUNTERS; +} + /* * https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/vkspec.html#vkCreateDevice * vkCreateDevice verifies that extensions and features requested in the ppEnabledExtensionNames and pEnabledFeatures diff --git a/driver/query.c b/driver/query.c index 6df499e..5ac58a2 100644 --- a/driver/query.c +++ b/driver/query.c @@ -1,17 +1,60 @@ #include "common.h" +//TODO VkPerformanceQuerySubmitInfoKHR + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireProfilingLockKHR( + VkDevice device, + const VkAcquireProfilingLockInfoKHR* pInfo) +{ + //TODO + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL vkReleaseProfilingLockKHR( + VkDevice device) +{ + //TODO +} + VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateQueryPool( VkDevice device, const VkQueryPoolCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkQueryPool* pQueryPool) { - //TODO + assert(device); + assert(pQueryPool); - if(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION) + _queryPool* qp = ALLOCATE(sizeof(_queryPool), 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION || + pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS || + pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP) { UNSUPPORTED(VK_QUERY_TYPE_OCCLUSION); } + else if(pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) + { + qp->queryCount = pCreateInfo->queryCount; + qp->type = pCreateInfo->queryType; + qp->queryPool = ALLOCATE(sizeof(_query) * qp->queryCount, 1, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + VkQueryPoolPerformanceCreateInfoKHR ci = *(VkQueryPoolPerformanceCreateInfoKHR*)pCreateInfo->pNext; + + for(uint32_t c = 0; c < qp->queryCount; ++c) + { + assert(ci.counterIndexCount <= VC4_PERFCNT_NUM_EVENTS); + qp->queryPool[c].numEnabledCounters = ci.counterIndexCount; + memcpy(qp->queryPool[c].enabledCounters, ci.pCounterIndices, sizeof(uint32_t) * ci.counterIndexCount); + + for(uint32_t d = 0; d < ci.counterIndexCount; d += DRM_VC4_MAX_PERF_COUNTERS) + { + qp->queryPool[c].perfmonIDs[d] = vc4_create_perfmon(controlFd, &qp->queryPool[c].enabledCounters[d], qp->queryPool[c].numEnabledCounters > DRM_VC4_MAX_PERF_COUNTERS ? DRM_VC4_MAX_PERF_COUNTERS : qp->queryPool[c].numEnabledCounters); + } + } + + *pQueryPool = qp; + } return VK_SUCCESS; } @@ -30,7 +73,20 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkDestroyQueryPool( VkQueryPool queryPool, const VkAllocationCallbacks* pAllocator) { - //TODO + assert(device); + assert(queryPool); + + _queryPool* qp = queryPool; + + for(uint32_t c = 0; c < qp->queryCount; ++c) + { + for(uint32_t d = 0; d < qp->queryPool[c].enabledCounters; d += DRM_VC4_MAX_PERF_COUNTERS) + { + vc4_destroy_perfmon(controlFd, qp->queryPool[c].perfmonIDs[d]); + } + } + + FREE(qp->queryPool); } VKAPI_ATTR void VKAPI_CALL rpi_vkCmdEndQuery( @@ -38,6 +94,9 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdEndQuery( VkQueryPool queryPool, uint32_t query) { + assert(commandBuffer); + assert(queryPool); + //TODO } @@ -47,6 +106,9 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBeginQuery( uint32_t query, VkQueryControlFlags flags) { + assert(commandBuffer); + assert(queryPool); + //TODO } @@ -73,7 +135,23 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetQueryPoolResults( VkDeviceSize stride, VkQueryResultFlags flags) { - //TODO + assert(device); + assert(queryPool); + + //TODO flags + + _queryPool* qp = queryPool; + + for(uint32_t c = firstQuery; c < queryCount; ++c) + { + uint32_t counter = 0; + for(uint32_t d = 0; d < dataSize; d += stride, ++counter) + { + VkPerformanceCounterResultKHR* result = ((char*)pData) + d; + result->uint64 = qp->queryPool[c].counterValues[counter]; + } + } + return VK_SUCCESS; } diff --git a/driver/vkCaps.h b/driver/vkCaps.h index ee34aca..3f0725a 100644 --- a/driver/vkCaps.h +++ b/driver/vkCaps.h @@ -264,6 +264,10 @@ static VkExtensionProperties deviceExtensions[] = { .extensionName = "VK_KHR_driver_properties", .specVersion = 1 + }, + { + .extensionName = "VK_KHR_performance_query", + .specVersion = 1 } }; #define numDeviceExtensions (sizeof(deviceExtensions) / sizeof(VkExtensionProperties)) @@ -305,4 +309,254 @@ static VkFormat supportedFormats[] = }; #define numSupportedFormats (sizeof(supportedFormats)/sizeof(VkFormat)) +static VkPerformanceCounterKHR performanceCounterTypes[] = +{ //TODO UUID + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + }, + { + .unit = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR, + .scope = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + .storage = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR, + } +}; + +static VkPerformanceCounterDescriptionKHR performanceCounterDescriptions[] = +{ + { + .name = "FRONT_END_PIPELINE_VALID_PRIMS_NO_RENDER", + }, + { + .name = "FRONT_END_PIPELINE_VALID_PRIMS_RENDER", + }, + { + .name = "FRONT_END_PIPELINE_CLIPPED_QUADS", + }, + { + .name = "FRONT_END_PIPELINE_VALID_QUADS", + }, + { + .name = "TILE_BUFFER_QUADS_NOT_PASSING_STENCIL", + }, + { + .name = "TILE_BUFFER_QUADS_NOT_PASSING_Z_AND_STENCIL", + }, + { + .name = "TILE_BUFFER_QUADS_PASSING_Z_AND_STENCIL", + }, + { + .name = "TILE_BUFFER_QUADS_ZERO_COVERAGE", + }, + { + .name = "TILE_BUFFER_QUADS_NON_ZERO_COVERAGE", + }, + { + .name = "TILE_BUFFER_QUADS_WRITTEN_TO_COLOR_BUF", + }, + { + .name = "PLB_PRIMS_OUTSIDE_VIEWPORT", + }, + { + .name = "PLB_PRIMS_NEED_CLIPPING", + }, + { + .name = "PRIMITIVE_SETUP_ENGINE_PRIMS_REVERSED", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_IDLE_CYCLES", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_FRAGMENT_SHADING", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_EXEC_VALID_INST", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_TMUS", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_VARYINGS", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_INST_CACHE_HIT", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_INST_CACHE_MISS", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_UNIFORM_CACHE_HIT", + }, + { + .name = "QUAD_PROCESSOR_UNIT_TOTAL_UNIFORM_CACHE_MISS", + }, + { + .name = "TEXTURE_MEMORY_LOOKUP_UNIT_TOTAL_TEXT_QUADS_PROCESSED", + }, + { + .name = "TEXTURE_MEMORY_LOOKUP_UNIT_TOTAL_TEXT_CACHE_MISS", + }, + { + .name = "VERTEX_PIPE_MEMORY_TOTAL_CLK_CYCLES_VERTEX_DMA_WRITE_STALLED", + }, + { + .name = "VERTEX_PIPE_MEMORY_TOTAL_CLK_CYCLES_VERTEX_DMA_STALLED", + }, + { + .name = "L2C_TOTAL_L2_CACHE_HIT", + }, + { + .name = "L2C_TOTAL_L2_CACHE_MISS", + } +}; + +#define numPerformanceCounterTypes (sizeof(performanceCounterTypes)/sizeof(uint32_t)) + #define VK_DRIVER_VERSION VK_MAKE_VERSION(1, 1, 0)