linux-kernel - [PATCH 6/6] drm/msm: dump submits which triggered gpu hang

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [day] [month] [year] [list]

Message-Id: <20171024132256.20286-7-robdclark@gmail.com>
Date:   Tue, 24 Oct 2017 09:22:53 -0400
From:   Rob Clark <robdclark@...il.com>
To:     dri-devel@...ts.freedesktop.org
Cc:     linux-arm-msm@...r.kernel.org, freedreno@...ts.freedesktop.org,
        Jordan Crouse <jcrouse@...eaurora.org>,
        Rob Clark <robdclark@...il.com>,
        David Airlie <airlied@...ux.ie>, linux-kernel@...r.kernel.org
Subject: [PATCH 6/6] drm/msm: dump submits which triggered gpu hang

Note we need to move update_fences() to after msm_rd_dump_submit(),
otherwise the bo's referenced by the submit may no longer be valid.

Signed-off-by: Rob Clark <robdclark@...il.com>
---
 drivers/gpu/drm/msm/msm_gpu.c | 52 +++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 403baea19329..939ea98908b8 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -255,34 +255,16 @@ static void recover_worker(struct work_struct *work)
 {
 	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
 	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_submit *submit;
 	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
-	uint64_t fence;
 	int i;
 
-	/* Update all the rings with the latest and greatest fence */
-	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
-		struct msm_ringbuffer *ring = gpu->rb[i];
-
-		fence = ring->memptrs->fence;
-
-		/*
-		 * For the current (faulting?) ring/submit advance the fence by
-		 * one more to clear the faulting submit
-		 */
-		if (ring == cur_ring)
-			fence = fence + 1;
-
-		update_fences(gpu, ring, fence);
-	}
-
 	mutex_lock(&dev->struct_mutex);
 
-
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
-	fence = cur_ring->memptrs->fence + 1;
 
-	submit = find_submit(cur_ring, fence);
+	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
 	if (submit) {
 		struct task_struct *task;
 
@@ -306,11 +288,37 @@ static void recover_worker(struct work_struct *work)
 			len = get_cmdline(task, buf, sizeof(buf));
 			mutex_lock(&dev->struct_mutex);
 
-			dev_err(dev->dev, "%s: offending task: %s (%-*s)\n",
-					gpu->name, task->comm, len, buf);
+			dev_err(dev->dev, "%s: offending task: %s (%.*s)\n",
+				gpu->name, task->comm, len, buf);
+
+			msm_rd_dump_submit(priv->hangrd, submit,
+				"offending task: %s (%.*s)", task->comm,
+				len, buf);
+		} else {
+			msm_rd_dump_submit(priv->hangrd, submit, NULL);
 		}
 		rcu_read_unlock();
+	}
+
+
+	/*
+	 * Update all the rings with the latest and greatest fence.. this
+	 * needs to happen after msm_rd_dump_submit() to ensure that the
+	 * bo's referenced by the offending submit are still around.
+	 */
+	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+		struct msm_ringbuffer *ring = gpu->rb[i];
+
+		uint32_t fence = ring->memptrs->fence;
 
+		/*
+		 * For the current (faulting?) ring/submit advance the fence by
+		 * one more to clear the faulting submit
+		 */
+		if (ring == cur_ring)
+			fence++;
+
+		update_fences(gpu, ring, fence);
 	}
 
 	if (msm_gpu_active(gpu)) {
-- 
2.13.6