[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a8ed8682-5546-493b-825c-3545081250bc@amd.com>
Date: Fri, 21 Nov 2025 16:02:43 +0100
From: Christian König <christian.koenig@....com>
To: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>,
Alex Deucher <alexander.deucher@....com>, David Airlie <airlied@...il.com>,
Simona Vetter <simona@...ll.ch>
Cc: amd-gfx@...ts.freedesktop.org, dri-devel@...ts.freedesktop.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 16/28] drm/amdgpu: use larger gart window when possible
On 11/21/25 11:12, Pierre-Eric Pelloux-Prayer wrote:
> Entities' gart windows are contiguous so when copying a buffer
> and src doesn't need a gart window, its window can be used to
> extend dst one (and vice versa).
>
> This doubles the gart window size and reduces the number of jobs
> required.
>
> ---
> v2: pass adev instead of ring to amdgpu_ttm_needs_gart_window
> ---
>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78 ++++++++++++++++++-------
> 1 file changed, 58 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 164b49d768d8..489880b2fb8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -177,6 +177,21 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entit
> return amdgpu_job_submit(job);
> }
>
> +static bool amdgpu_ttm_needs_gart_window(struct amdgpu_device *adev,
> + struct ttm_resource *mem,
> + struct amdgpu_res_cursor *mm_cur,
> + bool tmz,
> + uint64_t *addr)
> +{
> + /* Map only what can't be accessed directly */
> + if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
Checking mem->start here is actually a really bad idea.
IIRC Amar was once working on patches to fix that, but never finished them.
On the other hand, that is certainly not a problem for this patch here.
> + *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
> + mm_cur->start;
> + return false;
> + }
> + return true;
> +}
> +
> /**
> * amdgpu_ttm_map_buffer - Map memory into the GART windows
> * @adev: the device being used
> @@ -185,6 +200,7 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entit
> * @mem: memory object to map
> * @mm_cur: range to map
> * @window: which GART window to use
> + * @use_two_windows: if true, use a double window
> * @tmz: if we should setup a TMZ enabled mapping
> * @size: in number of bytes to map, out number of bytes mapped
> * @addr: resulting address inside the MC address space
> @@ -198,6 +214,7 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
> struct ttm_resource *mem,
> struct amdgpu_res_cursor *mm_cur,
> unsigned int window,
> + bool use_two_windows,
> bool tmz, uint64_t *size, uint64_t *addr)
> {
> unsigned int offset, num_pages, num_dw, num_bytes;
> @@ -213,13 +230,8 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
> if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
> return -EINVAL;
>
> - /* Map only what can't be accessed directly */
> - if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
> - *addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
> - mm_cur->start;
> + if (!amdgpu_ttm_needs_gart_window(adev, mem, mm_cur, tmz, addr))
> return 0;
> - }
> -
When that is now checkout outside of the function this shouldn't be necessary any more and we should be able to remove that here.
>
> /*
> * If start begins at an offset inside the page, then adjust the size
> @@ -228,7 +240,8 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_device *adev,
> offset = mm_cur->start & ~PAGE_MASK;
>
> num_pages = PFN_UP(*size + offset);
> - num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
> + num_pages = min_t(uint32_t,
> + num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE * (use_two_windows ? 2 : 1));
Rather use two separate calls to amdgpu_ttm_map_buffer() and just increment the cursor before the second call.
>
> *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
>
> @@ -300,7 +313,9 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> struct dma_resv *resv,
> struct dma_fence **f)
> {
> + bool src_needs_gart_window, dst_needs_gart_window, use_two_gart_windows;
> struct amdgpu_res_cursor src_mm, dst_mm;
> + int src_gart_window, dst_gart_window;
> struct dma_fence *fence = NULL;
> int r = 0;
> uint32_t copy_flags = 0;
> @@ -324,18 +339,40 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> /* Never copy more than 256MiB at once to avoid a timeout */
> cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>
> - /* Map src to window 0 and dst to window 1. */
> - r = amdgpu_ttm_map_buffer(adev, entity,
> - src->bo, src->mem, &src_mm,
> - 0, tmz, &cur_size, &from);
> - if (r)
> - goto error;
> + /* If only one direction needs a gart window to access memory, use both
> + * windows for it.
> + */
> + src_needs_gart_window =
> + amdgpu_ttm_needs_gart_window(adev, src->mem, &src_mm, tmz, &from);
> + dst_needs_gart_window =
> + amdgpu_ttm_needs_gart_window(adev, dst->mem, &dst_mm, tmz, &to);
The coding style looks a bit odd.
Regards,
Christian.
>
> - r = amdgpu_ttm_map_buffer(adev, entity,
> - dst->bo, dst->mem, &dst_mm,
> - 1, tmz, &cur_size, &to);
> - if (r)
> - goto error;
> + if (src_needs_gart_window) {
> + src_gart_window = 0;
> + use_two_gart_windows = !dst_needs_gart_window;
> + }
> + if (dst_needs_gart_window) {
> + dst_gart_window = src_needs_gart_window ? 1 : 0;
> + use_two_gart_windows = !src_needs_gart_window;
> + }
> +
> + if (src_needs_gart_window) {
> + r = amdgpu_ttm_map_buffer(adev, entity,
> + src->bo, src->mem, &src_mm,
> + src_gart_window, use_two_gart_windows,
> + tmz, &cur_size, &from);
> + if (r)
> + goto error;
> + }
> +
> + if (dst_needs_gart_window) {
> + r = amdgpu_ttm_map_buffer(adev, entity,
> + dst->bo, dst->mem, &dst_mm,
> + dst_gart_window, use_two_gart_windows,
> + tmz, &cur_size, &to);
> + if (r)
> + goto error;
> + }
>
> abo_src = ttm_to_amdgpu_bo(src->bo);
> abo_dst = ttm_to_amdgpu_bo(dst->bo);
> @@ -2434,7 +2471,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_device *adev,
>
> r = amdgpu_ttm_map_buffer(adev, entity,
> &bo->tbo, bo->tbo.resource, &cursor,
> - 1, false, &size, &addr);
> + 1, false, false, &size, &addr);
> if (r)
> goto err;
>
> @@ -2485,7 +2522,8 @@ int amdgpu_fill_buffer(struct amdgpu_device *adev,
>
> r = amdgpu_ttm_map_buffer(adev, entity,
> &bo->tbo, bo->tbo.resource, &dst,
> - 1, false, &cur_size, &to);
> + 1, false, false,
> + &cur_size, &to);
> if (r)
> goto error;
>
Powered by blists - more mailing lists