linux-kernel - Re: [PATCH v5 08/10] drm/amdgpu: use larger gart window when possible

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6b66d788-951e-4de3-8153-b4949e04e9e0@amd.com>
Date: Fri, 23 Jan 2026 16:56:05 +0100
From: Christian König <christian.koenig@....com>
To: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>,
 Alex Deucher <alexander.deucher@....com>, David Airlie <airlied@...il.com>,
 Simona Vetter <simona@...ll.ch>
Cc: amd-gfx@...ts.freedesktop.org, dri-devel@...ts.freedesktop.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v5 08/10] drm/amdgpu: use larger gart window when possible

On 1/22/26 18:02, Pierre-Eric Pelloux-Prayer wrote:
> Entities' gart windows are contiguous so when copying a buffer
> and src doesn't need a gart window, its window can be used to
> extend dst one (and vice versa).
> 
> This doubles the gart window size and reduces the number of jobs
> required.
> 
> ---
> v2: pass adev instead of ring to amdgpu_ttm_needs_gart_window
> v4:
> - merge if conditions
> - always call amdgpu_ttm_needs_gart_window before calling
>   amdgpu_ttm_map_buffer
> ---
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>

Well I'm still not very keen about all this here.

First of all it adds some complexity without much gain, e.g. the VRAM manager just falls back to 2MiB fragments to often. Arun is working on improving that, but doing that is basically just a hack.

Then I would really like to deprecate mem->start, it's basically just a leftover from when we still mapped everything into GART. We would need to clean that up first I think.

Can we postpone this change and land utilizing more SDMA engines first?

Regards,
Christian.

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 83 +++++++++++++++++--------
>  1 file changed, 57 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index e149092da8f1..7006c58a6992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -177,6 +177,21 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entit
>  	return amdgpu_job_submit(job);
>  }
>  
> +static bool amdgpu_ttm_needs_gart_window(struct amdgpu_device *adev,
> +					 struct ttm_resource *mem,
> +					 struct amdgpu_res_cursor *mm_cur,
> +					 bool tmz,
> +					 uint64_t *addr)
> +{
> +	/* Map only what can't be accessed directly */
> +	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
> +		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
> +			mm_cur->start;
> +		return false;
> +	}
> +	return true;
> +}
> +
>  /**
>   * amdgpu_ttm_map_buffer - Map memory into the GART windows
>   * @entity: entity to run the window setup job
> @@ -184,18 +199,22 @@ amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_ttm_buffer_entit
>   * @mem: memory object to map
>   * @mm_cur: range to map
>   * @window: which GART window to use
> + * @use_two_windows: if true, use a double window
>   * @tmz: if we should setup a TMZ enabled mapping
>   * @size: in number of bytes to map, out number of bytes mapped
>   * @addr: resulting address inside the MC address space
>   *
>   * Setup one of the GART windows to access a specific piece of memory or return
>   * the physical address for local memory.
> + * amdgpu_ttm_needs_gart_window() should be used to check if calling this
> + * function is required.
>   */
>  static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
>  				 struct ttm_buffer_object *bo,
>  				 struct ttm_resource *mem,
>  				 struct amdgpu_res_cursor *mm_cur,
>  				 unsigned int window,
> +				 bool use_two_windows,
>  				 bool tmz, uint64_t *size, uint64_t *addr)
>  {
>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> @@ -212,14 +231,6 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
>  	if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
>  		return -EINVAL;
>  
> -	/* Map only what can't be accessed directly */
> -	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
> -		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
> -			mm_cur->start;
> -		return 0;
> -	}
> -
> -
>  	/*
>  	 * If start begins at an offset inside the page, then adjust the size
>  	 * and addr accordingly
> @@ -227,7 +238,8 @@ static int amdgpu_ttm_map_buffer(struct amdgpu_ttm_buffer_entity *entity,
>  	offset = mm_cur->start & ~PAGE_MASK;
>  
>  	num_pages = PFN_UP(*size + offset);
> -	num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
> +	num_pages = min_t(uint32_t,
> +		num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE * (use_two_windows ? 2 : 1));
>  
>  	*size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
>  
> @@ -299,6 +311,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>  				      struct dma_resv *resv,
>  				      struct dma_fence **f)
>  {
> +	bool src_needs_gart_window, dst_needs_gart_window;
>  	struct amdgpu_res_cursor src_mm, dst_mm;
>  	struct dma_fence *fence = NULL;
>  	int r = 0;
> @@ -323,16 +336,29 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>  		/* Never copy more than 256MiB at once to avoid a timeout */
>  		cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
>  
> -		/* Map src to window 0 and dst to window 1. */
> -		r = amdgpu_ttm_map_buffer(entity, src->bo, src->mem, &src_mm,
> -					  0, tmz, &cur_size, &from);
> -		if (r)
> -			goto error;
> +		/* If only one direction needs a gart window to access memory, use both
> +		 * windows for it.
> +		 */
> +		src_needs_gart_window =
> +			amdgpu_ttm_needs_gart_window(adev, src->mem, &src_mm, tmz, &from);
> +		dst_needs_gart_window =
> +			amdgpu_ttm_needs_gart_window(adev, dst->mem, &dst_mm, tmz, &to);
>  
> -		r = amdgpu_ttm_map_buffer(entity, dst->bo, dst->mem, &dst_mm,
> -					  1, tmz, &cur_size, &to);
> -		if (r)
> -			goto error;
> +		if (src_needs_gart_window) {
> +			r = amdgpu_ttm_map_buffer(entity, src->bo, src->mem, &src_mm,
> +						  0, !dst_needs_gart_window,
> +						  tmz, &cur_size, &from);
> +			if (r)
> +				goto error;
> +		}
> +		if (dst_needs_gart_window) {
> +			r = amdgpu_ttm_map_buffer(entity, dst->bo, dst->mem, &dst_mm,
> +						  src_needs_gart_window ? 1 : 0,
> +						  !src_needs_gart_window,
> +						  tmz, &cur_size, &to);
> +			if (r)
> +				goto error;
> +		}
>  
>  		abo_src = ttm_to_amdgpu_bo(src->bo);
>  		abo_dst = ttm_to_amdgpu_bo(dst->bo);
> @@ -2579,10 +2605,12 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
>  		/* Never clear more than 256MiB at once to avoid timeouts */
>  		size = min(cursor.size, 256ULL << 20);
>  
> -		r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor,
> -					  0, false, &size, &addr);
> -		if (r)
> -			goto err;
> +		if (amdgpu_ttm_needs_gart_window(adev, bo->tbo.resource, &cursor, false, &addr)) {
> +			r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor,
> +						  0, false, false, &size, &addr);
> +			if (r)
> +				goto err;
> +		}
>  
>  		r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv,
>  					&next, true,
> @@ -2629,10 +2657,13 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity,
>  		/* Never fill more than 256MiB at once to avoid timeouts */
>  		cur_size = min(dst.size, 256ULL << 20);
>  
> -		r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst,
> -					  0, false, &cur_size, &to);
> -		if (r)
> -			goto error;
> +		if (amdgpu_ttm_needs_gart_window(adev, bo->tbo.resource, &dst, false, &to)) {
> +			r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst,
> +						  0, false, false,
> +						  &cur_size, &to);
> +			if (r)
> +				goto error;
> +		}
>  
>  		r = amdgpu_ttm_fill_mem(adev, entity,
>  					src_data, to, cur_size, resv,