mirror of
https://github.com/Yours3lf/rpi-vk-driver.git
synced 2025-03-03 03:29:17 +01:00
performance queries now seem to work
This commit is contained in:
parent
eaf884547e
commit
939b791183
@ -72,6 +72,7 @@ void clInsertNewCLMarker(ControlList* cl,
|
|||||||
marker.performResolve = performResolve;
|
marker.performResolve = performResolve;
|
||||||
marker.readMSAAimage = readMSAAimage;
|
marker.readMSAAimage = readMSAAimage;
|
||||||
marker.readMSAAdepthStencilImage = readMSAAdepthStencilImage;
|
marker.readMSAAdepthStencilImage = readMSAAdepthStencilImage;
|
||||||
|
marker.perfmonID = 0;
|
||||||
marker.handlesSize = 0;
|
marker.handlesSize = 0;
|
||||||
marker.shaderRecSize = 0;
|
marker.shaderRecSize = 0;
|
||||||
marker.uniformsSize = 0;
|
marker.uniformsSize = 0;
|
||||||
|
@ -27,6 +27,7 @@ typedef struct CLMarker
|
|||||||
uint32_t performResolve;
|
uint32_t performResolve;
|
||||||
uint32_t readMSAAimage;
|
uint32_t readMSAAimage;
|
||||||
uint32_t readMSAAdepthStencilImage;
|
uint32_t readMSAAdepthStencilImage;
|
||||||
|
void* perfmonID;
|
||||||
|
|
||||||
//pointers that point to where all the other CL data is
|
//pointers that point to where all the other CL data is
|
||||||
//plus sizes
|
//plus sizes
|
||||||
|
@ -145,6 +145,8 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkAllocateCommandBuffers(
|
|||||||
pCommandBuffers[c]->descriptorSetDirty = 1;
|
pCommandBuffers[c]->descriptorSetDirty = 1;
|
||||||
pCommandBuffers[c]->pushConstantDirty = 1;
|
pCommandBuffers[c]->pushConstantDirty = 1;
|
||||||
|
|
||||||
|
pCommandBuffers[c]->perfmonID = 0;
|
||||||
|
|
||||||
if(!pCommandBuffers[c]->binCl.buffer)
|
if(!pCommandBuffers[c]->binCl.buffer)
|
||||||
{
|
{
|
||||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
@ -481,6 +483,20 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
|
|||||||
submitCl.shader_rec = marker->shaderRecBuf;
|
submitCl.shader_rec = marker->shaderRecBuf;
|
||||||
submitCl.uniforms = marker->uniformsBuf;
|
submitCl.uniforms = marker->uniformsBuf;
|
||||||
|
|
||||||
|
if(marker->perfmonID)
|
||||||
|
{
|
||||||
|
uint32_t perfmonSelector = 0;
|
||||||
|
uint32_t* perfmonIDptr = (uint32_t*)marker->perfmonID;
|
||||||
|
|
||||||
|
if(pSubmits->pNext)
|
||||||
|
{
|
||||||
|
VkPerformanceQuerySubmitInfoKHR* perfQuerySubmitInfo = pSubmits->pNext;
|
||||||
|
perfmonSelector = perfQuerySubmitInfo->counterPassIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
submitCl.perfmonid = *(perfmonIDptr + perfmonSelector);
|
||||||
|
}
|
||||||
|
|
||||||
//marker not closed yet
|
//marker not closed yet
|
||||||
//close here
|
//close here
|
||||||
if(!marker->size)
|
if(!marker->size)
|
||||||
@ -589,6 +605,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkQueueSubmit(
|
|||||||
printf("clear z %u\n", submitCl.clear_z);
|
printf("clear z %u\n", submitCl.clear_z);
|
||||||
printf("clear s %u\n", submitCl.clear_s);
|
printf("clear s %u\n", submitCl.clear_s);
|
||||||
printf("flags %u\n", submitCl.flags);
|
printf("flags %u\n", submitCl.flags);
|
||||||
|
printf("perfmonID %u\n", submitCl.perfmonid);
|
||||||
/**/
|
/**/
|
||||||
|
|
||||||
|
|
||||||
|
@ -373,6 +373,12 @@ typedef struct VkCommandBuffer_T
|
|||||||
uint32_t indexBufferOffset;
|
uint32_t indexBufferOffset;
|
||||||
_buffer* indexBuffer;
|
_buffer* indexBuffer;
|
||||||
|
|
||||||
|
//Renderpass scope query must begin outside renderpass
|
||||||
|
//so there won't be any current marker...
|
||||||
|
//therefore store perfmonID here, and copy on beginrenderpass
|
||||||
|
//into marker
|
||||||
|
void* perfmonID;
|
||||||
|
|
||||||
//dirty flags used to reduce command stream clutter
|
//dirty flags used to reduce command stream clutter
|
||||||
uint32_t vertexBufferDirty;
|
uint32_t vertexBufferDirty;
|
||||||
uint32_t indexBufferDirty;
|
uint32_t indexBufferDirty;
|
||||||
|
@ -203,7 +203,7 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkGetPhysicalDeviceQueueFamilyPerformanceQueryPas
|
|||||||
assert(pPerformanceQueryCreateInfo);
|
assert(pPerformanceQueryCreateInfo);
|
||||||
assert(pNumPasses);
|
assert(pNumPasses);
|
||||||
|
|
||||||
*pNumPasses = pPerformanceQueryCreateInfo->counterIndexCount / DRM_VC4_MAX_PERF_COUNTERS;
|
*pNumPasses = pPerformanceQueryCreateInfo->counterIndexCount / DRM_VC4_MAX_PERF_COUNTERS + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -342,10 +342,12 @@ int vc4_seqno_wait(int fd, uint64_t* lastFinishedSeqno, uint64_t seqno, uint64_t
|
|||||||
if (ret != -ETIME) {
|
if (ret != -ETIME) {
|
||||||
fprintf(stderr, "Seqno wait failed: %s\n",
|
fprintf(stderr, "Seqno wait failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
|
vc4_print_hang_state(controlFd);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//Timeout happened
|
//Timeout happened
|
||||||
|
vc4_print_hang_state(controlFd);
|
||||||
*timeout_ns = -1;
|
*timeout_ns = -1;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -577,6 +579,7 @@ void vc4_cl_submit(int fd, struct drm_vc4_submit_cl* submit, uint64_t* lastEmitt
|
|||||||
|
|
||||||
if (*lastEmittedSeqno - *lastFinishedSeqno > 5) {
|
if (*lastEmittedSeqno - *lastFinishedSeqno > 5) {
|
||||||
uint64_t timeout = WAIT_TIMEOUT_INFINITE;
|
uint64_t timeout = WAIT_TIMEOUT_INFINITE;
|
||||||
|
//uint64_t timeout = 1000ull * 1000ull * 1000ull; //TODO waits too long...
|
||||||
if (!vc4_seqno_wait(fd,
|
if (!vc4_seqno_wait(fd,
|
||||||
lastFinishedSeqno,
|
lastFinishedSeqno,
|
||||||
*lastFinishedSeqno > 0 ? *lastEmittedSeqno - 5 : *lastEmittedSeqno,
|
*lastFinishedSeqno > 0 ? *lastEmittedSeqno - 5 : *lastEmittedSeqno,
|
||||||
@ -686,7 +689,7 @@ void vc4_print_hang_state(int fd)
|
|||||||
|
|
||||||
if (drmIoctl(fd, DRM_IOCTL_VC4_GET_HANG_STATE, &arg))
|
if (drmIoctl(fd, DRM_IOCTL_VC4_GET_HANG_STATE, &arg))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Perfmon get values failed: %s\n",
|
fprintf(stderr, "vc4 get hang state failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -51,7 +51,7 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkCreateQueryPool(
|
|||||||
for(uint32_t d = 0; d < ci.counterIndexCount; d += DRM_VC4_MAX_PERF_COUNTERS)
|
for(uint32_t d = 0; d < ci.counterIndexCount; d += DRM_VC4_MAX_PERF_COUNTERS)
|
||||||
{
|
{
|
||||||
qp->queryPool[c].perfmonIDs[d / DRM_VC4_MAX_PERF_COUNTERS] = vc4_create_perfmon(controlFd, &qp->queryPool[c].enabledCounters[d], qp->queryPool[c].numEnabledCounters > DRM_VC4_MAX_PERF_COUNTERS ? DRM_VC4_MAX_PERF_COUNTERS : qp->queryPool[c].numEnabledCounters);
|
qp->queryPool[c].perfmonIDs[d / DRM_VC4_MAX_PERF_COUNTERS] = vc4_create_perfmon(controlFd, &qp->queryPool[c].enabledCounters[d], qp->queryPool[c].numEnabledCounters > DRM_VC4_MAX_PERF_COUNTERS ? DRM_VC4_MAX_PERF_COUNTERS : qp->queryPool[c].numEnabledCounters);
|
||||||
memset(&qp->queryPool[c].counterValues[d][0], 0, sizeof(uint64_t) * DRM_VC4_MAX_PERF_COUNTERS);
|
memset(&qp->queryPool[c].counterValues[d / DRM_VC4_MAX_PERF_COUNTERS][0], 0, sizeof(uint64_t) * DRM_VC4_MAX_PERF_COUNTERS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -99,7 +99,9 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdEndQuery(
|
|||||||
assert(commandBuffer);
|
assert(commandBuffer);
|
||||||
assert(queryPool);
|
assert(queryPool);
|
||||||
|
|
||||||
//TODO
|
_commandBuffer* cmdBuf = commandBuffer;
|
||||||
|
|
||||||
|
cmdBuf->perfmonID = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBeginQuery(
|
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBeginQuery(
|
||||||
@ -111,7 +113,14 @@ VKAPI_ATTR void VKAPI_CALL rpi_vkCmdBeginQuery(
|
|||||||
assert(commandBuffer);
|
assert(commandBuffer);
|
||||||
assert(queryPool);
|
assert(queryPool);
|
||||||
|
|
||||||
//TODO
|
//TODO flags
|
||||||
|
|
||||||
|
_commandBuffer* cmdBuf = commandBuffer;
|
||||||
|
_queryPool* qp = queryPool;
|
||||||
|
|
||||||
|
|
||||||
|
//pass id will select the perfmon at submit
|
||||||
|
cmdBuf->perfmonID = qp->queryPool[query].perfmonIDs;
|
||||||
}
|
}
|
||||||
|
|
||||||
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyQueryPoolResults(
|
VKAPI_ATTR void VKAPI_CALL rpi_vkCmdCopyQueryPoolResults(
|
||||||
@ -149,14 +158,14 @@ VKAPI_ATTR VkResult VKAPI_CALL rpi_vkGetQueryPoolResults(
|
|||||||
{
|
{
|
||||||
for(uint32_t d = 0; d < qp->queryPool[c].numEnabledCounters; d += DRM_VC4_MAX_PERF_COUNTERS)
|
for(uint32_t d = 0; d < qp->queryPool[c].numEnabledCounters; d += DRM_VC4_MAX_PERF_COUNTERS)
|
||||||
{
|
{
|
||||||
vc4_perfmon_get_values(controlFd, qp->queryPool[c].perfmonIDs[d / DRM_VC4_MAX_PERF_COUNTERS], &qp->queryPool[c].counterValues[d][0]);
|
vc4_perfmon_get_values(controlFd, qp->queryPool[c].perfmonIDs[d / DRM_VC4_MAX_PERF_COUNTERS], &qp->queryPool[c].counterValues[d / DRM_VC4_MAX_PERF_COUNTERS][0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t counter = 0;
|
uint32_t counter = 0;
|
||||||
for(uint32_t d = 0; d < dataSize; d += stride, ++counter)
|
for(uint32_t d = 0; d < dataSize; d += stride, ++counter)
|
||||||
{
|
{
|
||||||
VkPerformanceCounterResultKHR* result = ((char*)pData) + d;
|
VkPerformanceCounterResultKHR* result = ((char*)pData) + d;
|
||||||
result->uint64 = qp->queryPool[c].counterValues[counter];
|
result->uint64 = qp->queryPool[c].counterValues[counter / DRM_VC4_MAX_PERF_COUNTERS][counter % DRM_VC4_MAX_PERF_COUNTERS];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -224,6 +224,8 @@ void rpi_vkCmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassB
|
|||||||
//command list.
|
//command list.
|
||||||
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
|
clFit(commandBuffer, &commandBuffer->binCl, V3D21_START_TILE_BINNING_length);
|
||||||
clInsertStartTileBinning(&commandBuffer->binCl);
|
clInsertStartTileBinning(&commandBuffer->binCl);
|
||||||
|
|
||||||
|
cb->binCl.currMarker->perfmonID = cb->perfmonID;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -467,93 +467,123 @@ static VkPerformanceCounterDescriptionKHR performanceCounterDescriptions[] =
|
|||||||
{
|
{
|
||||||
{
|
{
|
||||||
.name = "FRONT_END_PIPELINE_VALID_PRIMS_NO_RENDER",
|
.name = "FRONT_END_PIPELINE_VALID_PRIMS_NO_RENDER",
|
||||||
|
.description = "FEP Valid primitives that result in no rendered pixels, for all rendered tiles"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "FRONT_END_PIPELINE_VALID_PRIMS_RENDER",
|
.name = "FRONT_END_PIPELINE_VALID_PRIMS_RENDER",
|
||||||
|
.description = "FEP Valid primitives for all rendered tiles. (primitives may be counted in more than one tile)"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "FRONT_END_PIPELINE_CLIPPED_QUADS",
|
.name = "FRONT_END_PIPELINE_CLIPPED_QUADS",
|
||||||
|
.description = "FEP Early-Z/Near/Far clipped quads"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "FRONT_END_PIPELINE_VALID_QUADS",
|
.name = "FRONT_END_PIPELINE_VALID_QUADS",
|
||||||
|
.description = "FEP Valid quads"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TILE_BUFFER_QUADS_NOT_PASSING_STENCIL",
|
.name = "TILE_BUFFER_QUADS_NOT_PASSING_STENCIL",
|
||||||
|
.description = "TLB Quads with no pixels passing the stencil test"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TILE_BUFFER_QUADS_NOT_PASSING_Z_AND_STENCIL",
|
.name = "TILE_BUFFER_QUADS_NOT_PASSING_Z_AND_STENCIL",
|
||||||
|
.description = "TLB Quads with no pixels passing the Z and stencil tests"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TILE_BUFFER_QUADS_PASSING_Z_AND_STENCIL",
|
.name = "TILE_BUFFER_QUADS_PASSING_Z_AND_STENCIL",
|
||||||
|
.description = "TLB Quads with any pixels passing the Z and stencil tests"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TILE_BUFFER_QUADS_ZERO_COVERAGE",
|
.name = "TILE_BUFFER_QUADS_ZERO_COVERAGE",
|
||||||
|
.description = "TLB Quads with all pixels having zero coverage"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TILE_BUFFER_QUADS_NON_ZERO_COVERAGE",
|
.name = "TILE_BUFFER_QUADS_NON_ZERO_COVERAGE",
|
||||||
|
.description = "TLB Quads with any pixels having non-zero coverage"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TILE_BUFFER_QUADS_WRITTEN_TO_COLOR_BUF",
|
.name = "TILE_BUFFER_QUADS_WRITTEN_TO_COLOR_BUF",
|
||||||
|
.description = "TLB Quads with valid pixels written to color buffer"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "PLB_PRIMS_OUTSIDE_VIEWPORT",
|
.name = "PLB_PRIMS_OUTSIDE_VIEWPORT",
|
||||||
|
.description = "PTB Primitives discarded by being outside the viewport"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "PLB_PRIMS_NEED_CLIPPING",
|
.name = "PLB_PRIMS_NEED_CLIPPING",
|
||||||
|
.description = "PTB Primitives that need clipping"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "PRIMITIVE_SETUP_ENGINE_PRIMS_REVERSED",
|
.name = "PRIMITIVE_SETUP_ENGINE_PRIMS_REVERSED",
|
||||||
|
.description = "PSE Primitives that are discarded because they are reversed"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_IDLE_CYCLES",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_IDLE_CYCLES",
|
||||||
|
.description = "QPU Total idle clock cycles for all QPUs"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING",
|
||||||
|
.description = "QPU Total clock cycles for all QPUs doing vertex/coordinate shading"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_FRAGMENT_SHADING",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_FRAGMENT_SHADING",
|
||||||
|
.description = "QPU Total clock cycles for all QPUs doing fragment shading"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_EXEC_VALID_INST",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_EXEC_VALID_INST",
|
||||||
|
.description = "QPU Total clock cycles for all QPUs executing valid instructions"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_TMUS",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_TMUS",
|
||||||
|
.description = "QPU Total clock cycles for all QPUs stalled waiting for TMUs"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD",
|
||||||
|
.description = "QPU Total clock cycles for all QPUs stalled waiting for Scoreboard"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_VARYINGS",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_CLK_CYCLES_WAITING_VARYINGS",
|
||||||
|
.description = "QPU Total clock cycles for all QPUs stalled waiting for Varyings"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_INST_CACHE_HIT",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_INST_CACHE_HIT",
|
||||||
|
.description = "QPU Total instruction cache hits for all slices"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_INST_CACHE_MISS",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_INST_CACHE_MISS",
|
||||||
|
.description = "QPU Total instruction cache misses for all slices"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_UNIFORM_CACHE_HIT",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_UNIFORM_CACHE_HIT",
|
||||||
|
.description = "QPU Total uniforms cache hits for all slices"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "QUAD_PROCESSOR_UNIT_TOTAL_UNIFORM_CACHE_MISS",
|
.name = "QUAD_PROCESSOR_UNIT_TOTAL_UNIFORM_CACHE_MISS",
|
||||||
|
.description = "QPU Total uniforms cache misses for all slices"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TEXTURE_MEMORY_LOOKUP_UNIT_TOTAL_TEXT_QUADS_PROCESSED",
|
.name = "TEXTURE_MEMORY_LOOKUP_UNIT_TOTAL_TEXT_QUADS_PROCESSED",
|
||||||
|
.description = "TMU Total texture quads processed"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "TEXTURE_MEMORY_LOOKUP_UNIT_TOTAL_TEXT_CACHE_MISS",
|
.name = "TEXTURE_MEMORY_LOOKUP_UNIT_TOTAL_TEXT_CACHE_MISS",
|
||||||
|
.description = "TMU Total texture cache misses (number of fetches from memory/L2cache)"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "VERTEX_PIPE_MEMORY_TOTAL_CLK_CYCLES_VERTEX_DMA_WRITE_STALLED",
|
.name = "VERTEX_PIPE_MEMORY_TOTAL_CLK_CYCLES_VERTEX_DMA_WRITE_STALLED",
|
||||||
|
.description = "VPM Total clock cycles VDW is stalled waiting for VPM access"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "VERTEX_PIPE_MEMORY_TOTAL_CLK_CYCLES_VERTEX_DMA_STALLED",
|
.name = "VERTEX_PIPE_MEMORY_TOTAL_CLK_CYCLES_VERTEX_DMA_STALLED",
|
||||||
|
.description = "VPM Total clock cycles VCD is stalled waiting for VPM access"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "L2C_TOTAL_L2_CACHE_HIT",
|
.name = "L2C_TOTAL_L2_CACHE_HIT",
|
||||||
|
.description = "L2C Total Level 2 cache hits"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.name = "L2C_TOTAL_L2_CACHE_MISS",
|
.name = "L2C_TOTAL_L2_CACHE_MISS",
|
||||||
|
.description = "L2C Total Level 2 cache misses"
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -735,19 +735,19 @@ void recordCommandBuffers()
|
|||||||
}
|
}
|
||||||
|
|
||||||
void draw() {
|
void draw() {
|
||||||
// Acquire image
|
|
||||||
uint32_t imageIndex;
|
|
||||||
VkResult res = vkAcquireNextImageKHR(device, swapChain, UINT64_MAX, imageAvailableSemaphore, VK_NULL_HANDLE, &imageIndex);
|
|
||||||
|
|
||||||
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) {
|
|
||||||
std::cerr << "failed to acquire image" << std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "acquired image" << std::endl;
|
|
||||||
|
|
||||||
for(uint32_t c = 0; c < numQueryPasses; ++c)
|
for(uint32_t c = 0; c < numQueryPasses; ++c)
|
||||||
{
|
{
|
||||||
|
// Acquire image
|
||||||
|
uint32_t imageIndex;
|
||||||
|
VkResult res = vkAcquireNextImageKHR(device, swapChain, UINT64_MAX, imageAvailableSemaphore, VK_NULL_HANDLE, &imageIndex);
|
||||||
|
|
||||||
|
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) {
|
||||||
|
std::cerr << "failed to acquire image" << std::endl;
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "acquired image" << std::endl;
|
||||||
|
|
||||||
VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {};
|
VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {};
|
||||||
performanceQuerySubmitInfo.sType = VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR;
|
performanceQuerySubmitInfo.sType = VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR;
|
||||||
performanceQuerySubmitInfo.counterPassIndex = c;
|
performanceQuerySubmitInfo.counterPassIndex = c;
|
||||||
@ -770,13 +770,33 @@ void draw() {
|
|||||||
std::cerr << "failed to submit draw command buffer" << std::endl;
|
std::cerr << "failed to submit draw command buffer" << std::endl;
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << "submitted draw command buffer" << std::endl;
|
||||||
|
|
||||||
|
my_vkReleaseProfilingLockKHR(device);
|
||||||
|
|
||||||
|
// Present drawn image
|
||||||
|
// Note: semaphore here is not strictly necessary, because commands are processed in submission order within a single queue
|
||||||
|
VkPresentInfoKHR presentInfo = {};
|
||||||
|
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||||
|
presentInfo.waitSemaphoreCount = 1;
|
||||||
|
presentInfo.pWaitSemaphores = &renderingFinishedSemaphore;
|
||||||
|
|
||||||
|
presentInfo.swapchainCount = 1;
|
||||||
|
presentInfo.pSwapchains = &swapChain;
|
||||||
|
presentInfo.pImageIndices = &imageIndex;
|
||||||
|
|
||||||
|
res = vkQueuePresentKHR(presentQueue, &presentInfo);
|
||||||
|
|
||||||
|
if (res != VK_SUCCESS) {
|
||||||
|
std::cerr << "failed to submit present command buffer" << std::endl;
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "submitted draw command buffer" << std::endl;
|
|
||||||
|
|
||||||
my_vkReleaseProfilingLockKHR(device);
|
|
||||||
|
|
||||||
{ //Get query results
|
{ //Get query results
|
||||||
|
vkQueueWaitIdle(graphicsQueue);
|
||||||
|
|
||||||
VkPerformanceCounterResultKHR* recordedCounters = (VkPerformanceCounterResultKHR*)malloc(sizeof(VkPerformanceCounterResultKHR) * counterCount);
|
VkPerformanceCounterResultKHR* recordedCounters = (VkPerformanceCounterResultKHR*)malloc(sizeof(VkPerformanceCounterResultKHR) * counterCount);
|
||||||
vkGetQueryPoolResults(device, queryPool, 0, 1, sizeof(VkPerformanceCounterResultKHR) * counterCount, recordedCounters, sizeof(VkPerformanceCounterResultKHR), 0);
|
vkGetQueryPoolResults(device, queryPool, 0, 1, sizeof(VkPerformanceCounterResultKHR) * counterCount, recordedCounters, sizeof(VkPerformanceCounterResultKHR), 0);
|
||||||
|
|
||||||
@ -792,24 +812,6 @@ void draw() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Present drawn image
|
|
||||||
// Note: semaphore here is not strictly necessary, because commands are processed in submission order within a single queue
|
|
||||||
VkPresentInfoKHR presentInfo = {};
|
|
||||||
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
|
||||||
presentInfo.waitSemaphoreCount = 1;
|
|
||||||
presentInfo.pWaitSemaphores = &renderingFinishedSemaphore;
|
|
||||||
|
|
||||||
presentInfo.swapchainCount = 1;
|
|
||||||
presentInfo.pSwapchains = &swapChain;
|
|
||||||
presentInfo.pImageIndices = &imageIndex;
|
|
||||||
|
|
||||||
res = vkQueuePresentKHR(presentQueue, &presentInfo);
|
|
||||||
|
|
||||||
if (res != VK_SUCCESS) {
|
|
||||||
std::cerr << "failed to submit present command buffer" << std::endl;
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "submitted presentation command buffer" << std::endl;
|
std::cout << "submitted presentation command buffer" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user