[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251121101315.3585-17-pierre-eric.pelloux-prayer@amd.com>
Date: Fri, 21 Nov 2025 11:12:26 +0100
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
To: Alex Deucher <alexander.deucher@....com>,
Christian König <christian.koenig@....com>, David Airlie
<airlied@...il.com>, Simona Vetter <simona@...ll.ch>
CC: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>,
<amd-gfx@...ts.freedesktop.org>, <dri-devel@...ts.freedesktop.org>,
<linux-kernel@...r.kernel.org>
Subject: [PATCH v3 16/28] drm/amdgpu: use larger gart window when possible
Entities' gart windows are contiguous so when copying a buffer
and src doesn't need a gart window, its window can be used to
extend dst one (and vice versa).
This doubles the gart window size and reduces the number of jobs
required.
---
v2: pass adev instead of ring to amdgpu_ttm_needs_gart_window
---
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78 ++++++++++++++++++-------
1 file changed, 58 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 164b49d768d8..489880b2fb8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -177,6 +177,21 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entit
return amdgpu_job_submit(job);
}
+static bool amdgpu_ttm_needs_gart_window(struct amdgpu_device *adev,
+ struct ttm_resource *mem,
+ struct amdgpu_res_cursor *mm_cur,
+ bool tmz,
+ uint64_t *addr)
+{
+ /* Map only what can't be accessed directly */
+ if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
+ *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
+ mm_cur->start;
+ return false;
+ }
+ return true;
+}
+
/**
* amdgpu_ttm_map_buffer - Map memory into the GART windows
* @adev: the device being used
@@ -185,6 +200,7 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entit
* @mem: memory object to map
* @mm_cur: range to map
* @window: which GART window to use
+ * @use_two_windows: if true, use a double window
* @tmz: if we should setup a TMZ enabled mapping
* @size: in number of bytes to map, out number of bytes mapped
* @addr: resulting address inside the MC address space
@@ -198,6 +214,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
struct ttm_resource *mem,
struct amdgpu_res_cursor *mm_cur,
unsigned int window,
+ bool use_two_windows,
bool tmz, uint64_t *size, uint64_t *addr)
{
unsigned int offset, num_pages, num_dw, num_bytes;
@@ -213,13 +230,8 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
return -EINVAL;
- /* Map only what can't be accessed directly */
- if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
- *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
- mm_cur->start;
+ if (!amdgpu_ttm_needs_gart_window(adev, mem, mm_cur, tmz, addr))
return 0;
- }
-
/*
* If start begins at an offset inside the page, then adjust the size
@@ -228,7 +240,8 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
offset = mm_cur->start & ~PAGE_MASK;
num_pages = PFN_UP(*size + offset);
- num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
+ num_pages = min_t(uint32_t,
+ num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE * (use_two_windows ? 2 : 1));
*size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
@@ -300,7 +313,9 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct dma_resv *resv,
struct dma_fence **f)
{
+ bool src_needs_gart_window, dst_needs_gart_window, use_two_gart_windows;
struct amdgpu_res_cursor src_mm, dst_mm;
+ int src_gart_window, dst_gart_window;
struct dma_fence *fence = NULL;
int r = 0;
uint32_t copy_flags = 0;
@@ -324,18 +339,40 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
/* Never copy more than 256MiB at once to avoid a timeout */
cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
- /* Map src to window 0 and dst to window 1. */
- r = amdgpu_ttm_map_buffer(adev, entity,
- src->bo, src->mem, &src_mm,
- 0, tmz, &cur_size, &from);
- if (r)
- goto error;
+ /* If only one direction needs a gart window to access memory, use both
+ * windows for it.
+ */
+ src_needs_gart_window =
+ amdgpu_ttm_needs_gart_window(adev, src->mem, &src_mm, tmz, &from);
+ dst_needs_gart_window =
+ amdgpu_ttm_needs_gart_window(adev, dst->mem, &dst_mm, tmz, &to);
- r = amdgpu_ttm_map_buffer(adev, entity,
- dst->bo, dst->mem, &dst_mm,
- 1, tmz, &cur_size, &to);
- if (r)
- goto error;
+ if (src_needs_gart_window) {
+ src_gart_window = 0;
+ use_two_gart_windows = !dst_needs_gart_window;
+ }
+ if (dst_needs_gart_window) {
+ dst_gart_window = src_needs_gart_window ? 1 : 0;
+ use_two_gart_windows = !src_needs_gart_window;
+ }
+
+ if (src_needs_gart_window) {
+ r = amdgpu_ttm_map_buffer(adev, entity,
+ src->bo, src->mem, &src_mm,
+ src_gart_window, use_two_gart_windows,
+ tmz, &cur_size, &from);
+ if (r)
+ goto error;
+ }
+
+ if (dst_needs_gart_window) {
+ r = amdgpu_ttm_map_buffer(adev, entity,
+ dst->bo, dst->mem, &dst_mm,
+ dst_gart_window, use_two_gart_windows,
+ tmz, &cur_size, &to);
+ if (r)
+ goto error;
+ }
abo_src = ttm_to_amdgpu_bo(src->bo);
abo_dst = ttm_to_amdgpu_bo(dst->bo);
@@ -2434,7 +2471,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_device *adev,
r = amdgpu_ttm_map_buffer(adev, entity,
&bo->tbo, bo->tbo.resource, &cursor,
- 1, false, &size, &addr);
+ 1, false, false, &size, &addr);
if (r)
goto err;
@@ -2485,7 +2522,8 @@ int amdgpu_fill_buffer(struct amdgpu_device *adev,
r = amdgpu_ttm_map_buffer(adev, entity,
&bo->tbo, bo->tbo.resource, &dst,
- 1, false, &cur_size, &to);
+ 1, false, false,
+ &cur_size, &to);
if (r)
goto error;
--
2.43.0
Powered by blists - more mailing lists