[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260211102657.4180-4-pierre-eric.pelloux-prayer@amd.com>
Date: Wed, 11 Feb 2026 11:26:51 +0100
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
To: Alex Deucher <alexander.deucher@....com>,
Christian König <christian.koenig@....com>, David Airlie
<airlied@...il.com>, Simona Vetter <simona@...ll.ch>
CC: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>,
<amd-gfx@...ts.freedesktop.org>, <dri-devel@...ts.freedesktop.org>,
<linux-kernel@...r.kernel.org>
Subject: [PATCH v1 4/6] drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault
This is tricky to implement right and we're going to need
it from the devcoredump.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 84 +++++++++++++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +
2 files changed, 57 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 807f8bcc7de5..6a5b3e148554 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2930,6 +2930,50 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return 0;
}
+/**
+ * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.
+ * @adev: amdgpu device pointer
+ * @root: root BO of the VM
+ * @pasid: PASID of the VM
+ * The caller needs to unreserve and unref the root bo on success.
+ */
+struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
+ struct amdgpu_bo **root, u32 pasid)
+{
+ unsigned long irqflags;
+ struct amdgpu_vm *vm;
+ int r;
+
+ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+
+ if (!*root)
+ return NULL;
+
+ r = amdgpu_bo_reserve(*root, true);
+ if (r)
+ goto error_unref;
+
+ /* Double check that the VM still exists */
+ xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+ vm = xa_load(&adev->vm_manager.pasids, pasid);
+ if (vm && vm->root.bo != *root)
+ vm = NULL;
+ xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+ if (!vm)
+ goto error_unlock;
+
+ return vm;
+error_unlock:
+ amdgpu_bo_unreserve(*root);
+
+error_unref:
+ amdgpu_bo_unref(root);
+ return NULL;
+}
+
/**
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
@@ -2945,50 +2989,31 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
* shouldn't be reported any more.
*/
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
- bool write_fault)
+ u32 vmid, u32 node_id, uint64_t addr,
+ uint64_t ts, bool write_fault)
{
bool is_compute_context = false;
struct amdgpu_bo *root;
- unsigned long irqflags;
uint64_t value, flags;
struct amdgpu_vm *vm;
int r;
- xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
- vm = xa_load(&adev->vm_manager.pasids, pasid);
- if (vm) {
- root = amdgpu_bo_ref(vm->root.bo);
- is_compute_context = vm->is_compute_context;
- } else {
- root = NULL;
- }
- xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
-
- if (!root)
+ vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
+ if (!vm)
return false;
+ is_compute_context = vm->is_compute_context;
+
addr /= AMDGPU_GPU_PAGE_SIZE;
- if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
- node_id, addr, ts, write_fault)) {
+ if (is_compute_context &&
+ !svm_range_restore_pages(adev, pasid, vmid, node_id, addr,
+ ts, write_fault)) {
+ amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
return true;
}
- r = amdgpu_bo_reserve(root, true);
- if (r)
- goto error_unref;
-
- /* Double check that the VM still exists */
- xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
- vm = xa_load(&adev->vm_manager.pasids, pasid);
- if (vm && vm->root.bo != root)
- vm = NULL;
- xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
- if (!vm)
- goto error_unlock;
-
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
@@ -3027,7 +3052,6 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
if (r < 0)
dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
-error_unref:
amdgpu_bo_unref(&root);
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 139642eacdd0..2051eda55c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -589,6 +589,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
bool write_fault);
+struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
+ struct amdgpu_bo **root, u32 pasid);
+
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
--
2.43.0
Powered by blists - more mailing lists