lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250701130702.416ba635@canb.auug.org.au>
Date: Tue, 1 Jul 2025 13:07:02 +1000
From: Stephen Rothwell <sfr@...b.auug.org.au>
To: Simona Vetter <simona.vetter@...ll.ch>, Dave Airlie <airlied@...hat.com>
Cc: Intel Graphics <intel-gfx@...ts.freedesktop.org>, DRI
 <dri-devel@...ts.freedesktop.org>, Alex Deucher
 <alexander.deucher@....com>, André Almeida
 <andrealmeid@...lia.com>, Christian König
 <christian.koenig@....com>, Christian König
 <ckoenig.leichtzumerken@...il.com>, Linux Kernel Mailing List
 <linux-kernel@...r.kernel.org>, Linux Next Mailing List
 <linux-next@...r.kernel.org>
Subject: linux-next: manual merge of the drm-misc tree with the drm tree

Hi all,

Today's linux-next merge of the drm-misc tree got a conflict in:

  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

between commits:

  183bccafa176 ("drm: Create a task info option for wedge events")
  a72002cb181f ("drm/amdgpu: Make use of drm_wedge_task_info")

from the drm tree and commits:

  821aacb2dcf0 ("drm/amdgpu: rework queue reset scheduler interaction")
  43ca5eb94b38 ("drm/amdgpu: move guilty handling into ring resets")
  38b20968f3d8 ("drm/amdgpu: move scheduler wqueue handling into callbacks")

from the drm-misc tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 6b4ffa9ceb7a,f0b7080dccb8..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@@ -89,11 -89,9 +89,10 @@@ static enum drm_gpu_sched_stat amdgpu_j
  {
  	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  	struct amdgpu_job *job = to_amdgpu_job(s_job);
 +	struct drm_wedge_task_info *info = NULL;
  	struct amdgpu_task_info *ti;
  	struct amdgpu_device *adev = ring->adev;
- 	int idx;
- 	int r;
+ 	int idx, r;
  
  	if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
  		dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
@@@ -133,47 -133,22 +132,24 @@@
  	if (unlikely(adev->debug_disable_gpu_ring_reset)) {
  		dev_err(adev->dev, "Ring reset disabled by debug mask\n");
  	} else if (amdgpu_gpu_recovery && ring->funcs->reset) {
- 		bool is_guilty;
- 
- 		dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name);
- 		/* stop the scheduler, but don't mess with the
- 		 * bad job yet because if ring reset fails
- 		 * we'll fall back to full GPU reset.
- 		 */
- 		drm_sched_wqueue_stop(&ring->sched);
- 
- 		/* for engine resets, we need to reset the engine,
- 		 * but individual queues may be unaffected.
- 		 * check here to make sure the accounting is correct.
- 		 */
- 		if (ring->funcs->is_guilty)
- 			is_guilty = ring->funcs->is_guilty(ring);
- 		else
- 			is_guilty = true;
- 
- 		if (is_guilty)
- 			dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
- 
- 		r = amdgpu_ring_reset(ring, job->vmid);
+ 		dev_err(adev->dev, "Starting %s ring reset\n",
+ 			s_job->sched->name);
+ 		r = amdgpu_ring_reset(ring, job->vmid, NULL);
  		if (!r) {
- 			if (amdgpu_ring_sched_ready(ring))
- 				drm_sched_stop(&ring->sched, s_job);
- 			if (is_guilty) {
- 				atomic_inc(&ring->adev->gpu_reset_counter);
- 				amdgpu_fence_driver_force_completion(ring);
- 			}
- 			if (amdgpu_ring_sched_ready(ring))
- 				drm_sched_start(&ring->sched, 0);
- 			dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name);
- 			drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, info);
+ 			atomic_inc(&ring->adev->gpu_reset_counter);
+ 			dev_err(adev->dev, "Ring %s reset succeeded\n",
+ 				ring->sched.name);
+ 			drm_dev_wedged_event(adev_to_drm(adev),
 -					     DRM_WEDGE_RECOVERY_NONE);
++					     DRM_WEDGE_RECOVERY_NONE, info);
  			goto exit;
  		}
- 		dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name);
+ 		dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
  	}
+ 
  	dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
  
 +	amdgpu_vm_put_task_info(ti);
 +
  	if (amdgpu_device_should_recover_gpu(ring->adev)) {
  		struct amdgpu_reset_context reset_context;
  		memset(&reset_context, 0, sizeof(reset_context));

Content of type "application/pgp-signature" skipped

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ