diff --git a/driver/common.h b/driver/common.h index 85807d5..7e0d9bf 100644 --- a/driver/common.h +++ b/driver/common.h @@ -490,7 +490,7 @@ typedef struct VkDescriptorPool_T typedef struct VkQuery_T { uint32_t enabledCounters[VC4_PERFCNT_NUM_EVENTS]; - uint64_t counterValues[DRM_VC4_MAX_PERF_COUNTERS]; + uint64_t counterValues[2][DRM_VC4_MAX_PERF_COUNTERS]; uint32_t numEnabledCounters; uint32_t perfmonIDs[2]; } _query; diff --git a/driver/instance.c b/driver/instance.c index 1596336..7e6f208 100644 --- a/driver/instance.c +++ b/driver/instance.c @@ -441,6 +441,11 @@ VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL rpi_vkGetInstanceProcAddr( RETFUNC(vkAcquireNextImageKHR); RETFUNC(vkQueuePresentKHR); + RETFUNC(vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR); + RETFUNC(vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR); + RETFUNC(vkAcquireProfilingLockKHR); + RETFUNC(vkReleaseProfilingLockKHR); + return 0; } diff --git a/driver/kernelInterface.c b/driver/kernelInterface.c index 12c5607..2ed8a7c 100644 --- a/driver/kernelInterface.c +++ b/driver/kernelInterface.c @@ -596,7 +596,7 @@ uint32_t vc4_create_perfmon(int fd, uint32_t* counters, uint32_t num_counters) struct drm_vc4_perfmon_create arg = { - .ncounters = 30, + .ncounters = num_counters, }; for(uint32_t c = 0; c < num_counters; ++c) diff --git a/driver/query.c b/driver/query.c index 806b943..3d9992d 100644 --- a/driver/query.c +++ b/driver/query.c @@ -51,6 +51,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateQueryPool( for(uint32_t d = 0; d < ci.counterIndexCount; d += DRM_VC4_MAX_PERF_COUNTERS) { qp->queryPool[c].perfmonIDs[d] = vc4_create_perfmon(controlFd, &qp->queryPool[c].enabledCounters[d], qp->queryPool[c].numEnabledCounters > DRM_VC4_MAX_PERF_COUNTERS ? DRM_VC4_MAX_PERF_COUNTERS : qp->queryPool[c].numEnabledCounters); + memset(&qp->queryPool[c].counterValues[d][0], 0, sizeof(uint64_t) * DRM_VC4_MAX_PERF_COUNTERS); } } @@ -81,7 +82,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkDestroyQueryPool( for(uint32_t c = 0; c < qp->queryCount; ++c) { - for(uint32_t d = 0; d < qp->queryPool[c].enabledCounters; d += DRM_VC4_MAX_PERF_COUNTERS) + for(uint32_t d = 0; d < qp->queryPool[c].numEnabledCounters; d += DRM_VC4_MAX_PERF_COUNTERS) { vc4_destroy_perfmon(controlFd, qp->queryPool[c].perfmonIDs[d]); } @@ -140,11 +141,17 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetQueryPoolResults( assert(queryPool); //TODO flags + //TODO return values etc. _queryPool* qp = queryPool; for(uint32_t c = firstQuery; c < queryCount; ++c) { + for(uint32_t d = 0; d < qp->queryPool[c].numEnabledCounters; d += DRM_VC4_MAX_PERF_COUNTERS) + { + vc4_perfmon_get_values(controlFd, qp->queryPool[c].perfmonIDs[d], &qp->queryPool[c].counterValues[d][0]); + } + uint32_t counter = 0; for(uint32_t d = 0; d < dataSize; d += stride, ++counter) { diff --git a/driver/vkCaps.h b/driver/vkCaps.h index 3f0725a..0739cd3 100644 --- a/driver/vkCaps.h +++ b/driver/vkCaps.h @@ -557,6 +557,6 @@ static VkPerformanceCounterDescriptionKHR performanceCounterDescriptions[] = } }; -#define numPerformanceCounterTypes (sizeof(performanceCounterTypes)/sizeof(uint32_t)) +#define numPerformanceCounterTypes (sizeof(performanceCounterTypes)/sizeof(VkPerformanceCounterKHR)) #define VK_DRIVER_VERSION VK_MAKE_VERSION(1, 1, 0) diff --git a/test/query/query.cpp b/test/query/query.cpp index 6067830..7145515 100644 --- a/test/query/query.cpp +++ b/test/query/query.cpp @@ -84,6 +84,30 @@ uint32_t* enabledCounters = 0; uint32_t numQueryPasses = 0; VkQueryPool queryPool; +typedef VkResult (VKAPI_PTR *PFN_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR)( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + uint32_t* pCounterCount, + VkPerformanceCounterKHR* pCounters, + VkPerformanceCounterDescriptionKHR* pCounterDescriptions); + +typedef void (VKAPI_ATTR *PFN_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR)( + VkPhysicalDevice physicalDevice, + const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo, + uint32_t* pNumPasses); + +typedef VkResult (VKAPI_ATTR *PFN_vkAcquireProfilingLockKHR)( + VkDevice device, + const VkAcquireProfilingLockInfoKHR* pInfo); + +typedef void (VKAPI_ATTR *PFN_vkReleaseProfilingLockKHR)( + VkDevice device); + +PFN_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR my_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR; +PFN_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR my_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR; +PFN_vkAcquireProfilingLockKHR my_vkAcquireProfilingLockKHR; +PFN_vkReleaseProfilingLockKHR my_vkReleaseProfilingLockKHR; + void cleanup() { vkDeviceWaitIdle(device); @@ -151,6 +175,7 @@ void setupVulkan() { //CreateUniformBuffer(); CreateShaders(); CreatePipeline(); + CreateQueryPool(); recordCommandBuffers(); } @@ -659,7 +684,7 @@ void recordCommandBuffers() lockInfo.sType = VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR; lockInfo.timeout = UINT64_MAX; - vkAcquireProfilingLockKHR(device, &lockInfo); + my_vkAcquireProfilingLockKHR(device, &lockInfo); // Record command buffer vkBeginCommandBuffer(presentCommandBuffers[i], &beginInfo); @@ -749,7 +774,7 @@ void draw() { std::cout << "submitted draw command buffer" << std::endl; - vkReleaseProfilingLockKHR(device); + my_vkReleaseProfilingLockKHR(device); { //Get query results VkPerformanceCounterResultKHR* recordedCounters = (VkPerformanceCounterResultKHR*)malloc(sizeof(VkPerformanceCounterResultKHR) * counterCount); @@ -761,7 +786,7 @@ void draw() { { case VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR: case VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR: - printf("%s %ull\n", counterDescriptions[enabledCounters[c]].name, recordedCounters[c].uint64); + printf("%s %llu\n", counterDescriptions[enabledCounters[c]].name, recordedCounters[c].uint64); break; } } @@ -1232,11 +1257,16 @@ void CreateVertexBuffer() void CreateQueryPool() { - vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, graphicsQueueFamily, &counterCount, 0, 0); + my_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR = (PFN_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR)vkGetInstanceProcAddr(instance, "vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR"); + my_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR = (PFN_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR)vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR"); + my_vkAcquireProfilingLockKHR = (PFN_vkAcquireProfilingLockKHR)vkGetInstanceProcAddr(instance, "vkAcquireProfilingLockKHR"); + my_vkReleaseProfilingLockKHR = (PFN_vkReleaseProfilingLockKHR)vkGetInstanceProcAddr(instance, "vkReleaseProfilingLockKHR"); + + my_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, graphicsQueueFamily, &counterCount, 0, 0); counters = (VkPerformanceCounterKHR*)malloc(sizeof(VkPerformanceCounterKHR) * counterCount); counterDescriptions = (VkPerformanceCounterDescriptionKHR*)malloc(sizeof(VkPerformanceCounterDescriptionKHR) * counterCount); - vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, graphicsQueueFamily, &counterCount, counters, counterDescriptions); + my_vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(physicalDevice, graphicsQueueFamily, &counterCount, counters, counterDescriptions); enabledCounters = (uint32_t*)malloc(sizeof(uint32_t) * counterCount); for(uint32_t c = 0; c < counterCount; ++c) @@ -1250,7 +1280,7 @@ void CreateQueryPool() performanceQueryCreateInfo.counterIndexCount = counterCount; performanceQueryCreateInfo.pCounterIndices = enabledCounters; - vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo, &numQueryPasses); + my_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(physicalDevice, &performanceQueryCreateInfo, &numQueryPasses); VkQueryPoolCreateInfo queryPoolCreateInfo = {}; queryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;