@@ -39,3 +39,5 @@ header.
.SH BUGS
Some GPUs report some units as busy when they aren't, such that even when
idle and not hung, it will show up as 100% busy.
+.TP
+Haswell GPU may hang when trying to determine ring busyness, and so it is disabled by default.
@@ -316,6 +316,7 @@ struct ring {
int head, tail, size;
uint64_t full;
int idle;
+ bool skip_ring_reads;
};
static uint32_t ring_read(struct ring *ring, uint32_t reg)
@@ -323,9 +324,14 @@ static uint32_t ring_read(struct ring *ring, uint32_t reg)
return INREG(ring->mmio + reg);
}
-static void ring_init(struct ring *ring)
+static void ring_init(struct ring *ring, uint32_t devid)
{
ring->size = (((ring_read(ring, RING_LEN) & RING_NR_PAGES) >> 12) + 1) * 4096;
+
+ if (IS_HASWELL(devid)) {
+ fprintf(stderr, "Skipping reads of head, and tail registers to avoid hangs\n");
+ ring->skip_ring_reads = true;
+ }
}
static void ring_reset(struct ring *ring)
@@ -340,6 +346,9 @@ static void ring_sample(struct ring *ring)
if (!ring->size)
return;
+ if (ring->skip_ring_reads)
+ return;
+
ring->head = ring_read(ring, RING_HEAD) & HEAD_ADDR;
ring->tail = ring_read(ring, RING_TAIL) & TAIL_ADDR;
@@ -366,6 +375,15 @@ static void ring_print(struct ring *ring, unsigned long samples_per_sec)
if (!ring->size)
return;
+ if (ring->skip_ring_reads) {
+ len = printf("%25s busy: ??%%: ", ring->name);
+ print_percentage_bar(0, len);
+ printf("%24s space: ??\?/%d\n",
+ ring->name,
+ ring->size);
+ return;
+ }
+
percent_busy = 100 - 100 * ring->idle / samples_per_sec;
len = printf("%25s busy: %3d%%: ", ring->name, percent_busy);
@@ -513,12 +531,12 @@ int main(int argc, char **argv)
/* Grab access to the registers */
intel_register_access_init(pci_dev, 0);
- ring_init(&render_ring);
+ ring_init(&render_ring, devid);
if (IS_GEN4(devid) || IS_GEN5(devid))
- ring_init(&bsd_ring);
+ ring_init(&bsd_ring, devid);
if (IS_GEN6(devid) || IS_GEN7(devid)) {
- ring_init(&bsd6_ring);
- ring_init(&blt_ring);
+ ring_init(&bsd6_ring, devid);
+ ring_init(&blt_ring, devid);
}
/* Initialize GPU stats */
Haswell has a known issue when doing concurrent reads of MMIO (see reference for details). This issue results in a system wide unrecoverable hang. As this tool is shipped by default in the IGT package, this is a very mean behavior to accidentally impose on the user. This patch shuts this behavior down by default on HSW, and prints a warning. An upcoming patch will provide an override for the insane. References: https://lists.freedesktop.org/archives/mesa-dev/2013-July/041692.html Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- man/intel_gpu_top.man | 2 ++ tools/intel_gpu_top.c | 28 +++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-)