[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250605183111.163594-41-robin.clark@oss.qualcomm.com>
Date: Thu, 5 Jun 2025 11:29:25 -0700
From: Rob Clark <robin.clark@....qualcomm.com>
To: dri-devel@...ts.freedesktop.org
Cc: freedreno@...ts.freedesktop.org, linux-arm-msm@...r.kernel.org,
Connor Abbott <cwabbott0@...il.com>,
Rob Clark <robin.clark@....qualcomm.com>,
Rob Clark <robdclark@...il.com>,
Abhinav Kumar <quic_abhinavk@...cinc.com>,
Dmitry Baryshkov <lumag@...nel.org>, Sean Paul <sean@...rly.run>,
Marijn Suijten <marijn.suijten@...ainline.org>,
David Airlie <airlied@...il.com>, Simona Vetter <simona@...ll.ch>,
Konrad Dybcio <konradybcio@...nel.org>,
linux-kernel@...r.kernel.org (open list)
Subject: [PATCH v6 40/40] drm/msm: Add VM_BIND throttling
A large number of (unsorted or separate) small (<2MB) mappings can cause
a lot of, probably unnecessary, prealloc pages. Ie. a single 4k page
size mapping will pre-allocate 3 pages (for levels 2-4) for the
pagetable. Which can chew up a large amount of unneeded memory. So add
a mechanism to put an upper bound on the # of pre-alloc pages.
Signed-off-by: Rob Clark <robin.clark@....qualcomm.com>
---
drivers/gpu/drm/msm/msm_gem_vma.c | 23 +++++++++++++++++++++--
drivers/gpu/drm/msm/msm_gpu.h | 3 +++
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index b6de87e5c3f7..83f6f95b4865 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -705,6 +705,8 @@ msm_vma_job_free(struct drm_sched_job *_job)
mmu->funcs->prealloc_cleanup(mmu, &job->prealloc);
+ atomic_sub(job->prealloc.count, &job->queue->in_flight_prealloc);
+
drm_sched_job_cleanup(_job);
job_foreach_bo (obj, job)
@@ -1087,10 +1089,11 @@ ops_are_same_pte(struct msm_vm_bind_op *first, struct msm_vm_bind_op *next)
* them as a single mapping. Otherwise the prealloc_count() will not realize
* they can share pagetable pages and vastly overcount.
*/
-static void
+static int
vm_bind_prealloc_count(struct msm_vm_bind_job *job)
{
struct msm_vm_bind_op *first = NULL, *last = NULL;
+ int ret;
for (int i = 0; i < job->nr_ops; i++) {
struct msm_vm_bind_op *op = &job->ops[i];
@@ -1119,6 +1122,20 @@ vm_bind_prealloc_count(struct msm_vm_bind_job *job)
/* Flush the remaining range: */
prealloc_count(job, first, last);
+
+ /*
+ * Now that we know the needed amount to pre-alloc, throttle on pending
+ * VM_BIND jobs if we already have too much pre-alloc memory in flight
+ */
+ ret = wait_event_interruptible(
+ to_msm_vm(job->vm)->sched.job_scheduled,
+ atomic_read(&job->queue->in_flight_prealloc) <= 1024);
+ if (ret)
+ return ret;
+
+ atomic_add(job->prealloc.count, &job->queue->in_flight_prealloc);
+
+ return 0;
}
/*
@@ -1389,7 +1406,9 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file)
if (ret)
goto out_unlock;
- vm_bind_prealloc_count(job);
+ ret = vm_bind_prealloc_count(job);
+ if (ret)
+ goto out_unlock;
struct drm_exec exec;
unsigned flags = DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT;
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 31b83e9e3673..5508885d865f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -555,6 +555,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
* seqno, protected by submitqueue lock
* @idr_lock: for serializing access to fence_idr
* @lock: submitqueue lock for serializing submits on a queue
+ * @in_flight_prealloc: for VM_BIND queue, # of preallocated pgtable pages for
+ * queued VM_BIND jobs
* @ref: reference count
* @entity: the submit job-queue
*/
@@ -569,6 +571,7 @@ struct msm_gpu_submitqueue {
struct idr fence_idr;
struct spinlock idr_lock;
struct mutex lock;
+ atomic_t in_flight_prealloc;
struct kref ref;
struct drm_sched_entity *entity;
--
2.49.0
Powered by blists - more mailing lists