[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251104083605.13677-8-pierre-eric.pelloux-prayer@amd.com>
Date: Tue, 4 Nov 2025 09:35:22 +0100
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
To: Alex Deucher <alexander.deucher@....com>,
Christian König <christian.koenig@....com>, David Airlie
<airlied@...il.com>, Simona Vetter <simona@...ll.ch>
CC: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>,
<amd-gfx@...ts.freedesktop.org>, <dri-devel@...ts.freedesktop.org>,
<linux-kernel@...r.kernel.org>
Subject: [PATCH v1 07/20] drm/amdgpu: allocate multiple clear entities
No functional change for now, as we always use entity 0.
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +--
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 6 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 76 +++++++++++++--------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 10 +--
5 files changed, 66 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 2a444d02cf4b..e73dcfed5338 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -655,7 +655,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
struct dma_fence *fence;
struct amdgpu_job *job;
- int r;
+ int r, i;
if (!hub->sdma_invalidation_workaround || vmid ||
!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
@@ -686,8 +686,9 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* translation. Avoid this by doing the invalidation from the SDMA
* itself at least for GART.
*/
- mutex_lock(&adev->mman.clear_entity.gart_window_lock);
mutex_lock(&adev->mman.move_entity.gart_window_lock);
+ for (i = 0; i < adev->mman.num_clear_entities; i++)
+ mutex_lock(&adev->mman.clear_entities[i].gart_window_lock);
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
@@ -701,7 +702,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
fence = amdgpu_job_submit(job);
mutex_unlock(&adev->mman.move_entity.gart_window_lock);
- mutex_unlock(&adev->mman.clear_entity.gart_window_lock);
+ for (i = 0; i < adev->mman.num_clear_entities; i++)
+ mutex_unlock(&adev->mman.clear_entities[i].gart_window_lock);
dma_fence_wait(fence, false);
dma_fence_put(fence);
@@ -710,7 +712,8 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
error_alloc:
mutex_unlock(&adev->mman.move_entity.gart_window_lock);
- mutex_unlock(&adev->mman.clear_entity.gart_window_lock);
+ for (i = 0; i < adev->mman.num_clear_entities; i++)
+ mutex_unlock(&adev->mman.clear_entities[i].gart_window_lock);
dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 0760e70402ec..3771e89035f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -269,10 +269,12 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
*
* @adev: amdgpu_device pointer
* @gtt_size: maximum size of GTT
+ * @reserved_windows: num of already used windows
*
* Allocate and initialize the GTT manager.
*/
-int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size,
+ u32 reserved_windows)
{
struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
struct ttm_resource_manager *man = &mgr->manager;
@@ -283,7 +285,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
- start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+ start = AMDGPU_GTT_MAX_TRANSFER_SIZE * reserved_windows;
size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c06c132a753c..e7b2cae031b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1321,7 +1321,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (r)
goto out;
- r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv,
+ r = amdgpu_fill_buffer(&adev->mman.clear_entities[0], abo, 0, &bo->base._resv,
&fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
if (WARN_ON(r))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3a77eea8869d..e0e469b73013 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1891,6 +1891,7 @@ static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
uint64_t gtt_size;
+ u32 gart_window;
int r;
dma_set_max_seg_size(adev->dev, UINT_MAX);
@@ -1923,7 +1924,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
/* Change the size here instead of the init above so only lpfn is affected */
- amdgpu_ttm_set_buffer_funcs_status(adev, false);
+ gart_window = amdgpu_ttm_set_buffer_funcs_status(adev, false);
#ifdef CONFIG_64BIT
#ifdef CONFIG_X86
if (adev->gmc.xgmi.connected_to_cpu)
@@ -2019,7 +2020,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
/* Initialize GTT memory pool */
- r = amdgpu_gtt_mgr_init(adev, gtt_size);
+ r = amdgpu_gtt_mgr_init(adev, gtt_size, gart_window);
if (r) {
dev_err(adev->dev, "Failed initializing GTT heap.\n");
return r;
@@ -2158,16 +2159,22 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
*
* Enable/disable use of buffer functions during suspend/resume. This should
* only be called at bootup or when userspace isn't running.
+ *
+ * Returns: the number of GART reserved window
*/
-void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
+u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
{
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
uint64_t size;
- int r, i;
+ int r, i, j;
+ u32 num_clear_entities, windows, w;
+
+ num_clear_entities = adev->sdma.num_instances;
+ windows = adev->gmc.is_app_apu ? 0 : (2 + num_clear_entities);
if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
- return;
+ return windows;
if (enable) {
struct amdgpu_ring *ring;
@@ -2180,19 +2187,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
1, NULL);
if (r) {
dev_err(adev->dev,
- "Failed setting up TTM BO move entity (%d)\n",
+ "Failed setting up TTM BO eviction entity (%d)\n",
r);
- return;
- }
-
- r = drm_sched_entity_init(&adev->mman.clear_entity.base,
- DRM_SCHED_PRIORITY_NORMAL, &sched,
- 1, NULL);
- if (r) {
- dev_err(adev->dev,
- "Failed setting up TTM BO clear entity (%d)\n",
- r);
- goto error_free_entity;
+ return 0;
}
r = drm_sched_entity_init(&adev->mman.move_entity.base,
@@ -2202,26 +2199,51 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
dev_err(adev->dev,
"Failed setting up TTM BO move entity (%d)\n",
r);
- drm_sched_entity_destroy(&adev->mman.clear_entity.base);
goto error_free_entity;
}
+ adev->mman.num_clear_entities = num_clear_entities;
+ adev->mman.clear_entities = kcalloc(num_clear_entities,
+ sizeof(struct amdgpu_ttm_entity),
+ GFP_KERNEL);
+ if (!adev->mman.clear_entities)
+ goto error_free_entity;
+
+ for (i = 0; i < num_clear_entities; i++) {
+ r = drm_sched_entity_init(&adev->mman.clear_entities[i].base,
+ DRM_SCHED_PRIORITY_NORMAL, &sched,
+ 1, NULL);
+ if (r) {
+ for (j = 0; j < i; j++)
+ drm_sched_entity_destroy(
+ &adev->mman.clear_entities[j].base);
+ kfree(adev->mman.clear_entities);
+ goto error_free_entity;
+ }
+ }
+
/* Statically assign GART windows to each entity. */
+ w = 0;
mutex_init(&adev->mman.default_entity.gart_window_lock);
- adev->mman.move_entity.gart_window_id0 = 0;
- adev->mman.move_entity.gart_window_id1 = 1;
+ adev->mman.move_entity.gart_window_id0 = w++;
+ adev->mman.move_entity.gart_window_id1 = w++;
mutex_init(&adev->mman.move_entity.gart_window_lock);
- /* Clearing entity doesn't use id0 */
- adev->mman.clear_entity.gart_window_id1 = 2;
- mutex_init(&adev->mman.clear_entity.gart_window_lock);
+ for (i = 0; i < num_clear_entities; i++) {
+ /* Clearing entities don't use id0 */
+ adev->mman.clear_entities[i].gart_window_id1 = w++;
+ mutex_init(&adev->mman.clear_entities[i].gart_window_lock);
+ }
+ WARN_ON(w != windows);
} else {
drm_sched_entity_destroy(&adev->mman.default_entity.base);
- drm_sched_entity_destroy(&adev->mman.clear_entity.base);
drm_sched_entity_destroy(&adev->mman.move_entity.base);
+ for (i = 0; i < num_clear_entities; i++)
+ drm_sched_entity_destroy(&adev->mman.clear_entities[i].base);
for (i = 0; i < TTM_FENCES_MAX_SLOT_COUNT; i++) {
dma_fence_put(man->pipelined_eviction.fences[i]);
man->pipelined_eviction.fences[i] = NULL;
}
+ kfree(adev->mman.clear_entities);
}
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
@@ -2232,10 +2254,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
man->size = size;
adev->mman.buffer_funcs_enabled = enable;
- return;
+ return windows;
error_free_entity:
drm_sched_entity_destroy(&adev->mman.default_entity.base);
+ return 0;
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@@ -2388,8 +2411,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
if (!fence)
return -EINVAL;
-
- entity = &adev->mman.clear_entity;
+ entity = &adev->mman.clear_entities[0];
*fence = dma_fence_get_stub();
amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 8b7a56737479..e7ada4605472 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -39,7 +39,6 @@
#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
-#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 3
extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
@@ -73,8 +72,9 @@ struct amdgpu_mman {
struct mutex gtt_window_lock;
struct amdgpu_ttm_entity default_entity; /* has no gart windows */
- struct amdgpu_ttm_entity clear_entity;
struct amdgpu_ttm_entity move_entity;
+ struct amdgpu_ttm_entity *clear_entities;
+ u32 num_clear_entities;
struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr;
@@ -134,7 +134,7 @@ struct amdgpu_copy_mem {
#define AMDGPU_COPY_FLAGS_GET(value, field) \
(((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & AMDGPU_COPY_FLAGS_##field##_MASK)
-int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size, u32 reserved_windows);
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
@@ -168,8 +168,8 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
-void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
- bool enable);
+u32 amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+ bool enable);
int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct drm_sched_entity *entity,
uint64_t src_offset,
--
2.43.0
Powered by blists - more mailing lists