linux-kernel - [PATCH v1 19/20] drm/amdgpu: use larger gart window when possible

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251104083605.13677-20-pierre-eric.pelloux-prayer@amd.com>
Date: Tue, 4 Nov 2025 09:35:34 +0100
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
To: Alex Deucher <alexander.deucher@....com>,
	Christian König <christian.koenig@....com>, David Airlie
	<airlied@...il.com>, Simona Vetter <simona@...ll.ch>
CC: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>,
	<amd-gfx@...ts.freedesktop.org>, <dri-devel@...ts.freedesktop.org>,
	<linux-kernel@...r.kernel.org>
Subject: [PATCH v1 19/20] drm/amdgpu: use larger gart window when possible

Entities' gart windows are contiguous so when copying a buffer
and src doesn't need a gart window, its window can be used to
extend dst one (and vice versa).

This doubles the gart window size and reduces the number of jobs
required.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@....com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 85 ++++++++++++++++++-------
 1 file changed, 63 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ac2857314d68..3bfdce1ef5c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -162,6 +162,23 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	*placement = abo->placement;
 }
 
+static bool amdgpu_ttm_needs_gart_window(struct amdgpu_ring *ring,
+					 struct ttm_resource *mem,
+					 struct amdgpu_res_cursor *mm_cur,
+					 bool tmz,
+					 uint64_t *addr)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* Map only what can't be accessed directly */
+	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
+		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
+			mm_cur->start;
+		return false;
+	}
+	return true;
+}
+
 /**
  * amdgpu_ttm_map_buffer - Map memory into the GART windows
  * @entity: entity to run the window setup job
@@ -169,6 +186,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
  * @mem: memory object to map
  * @mm_cur: range to map
  * @window: which GART window to use
+ * @use_two_windows: if true, use a double window
  * @ring: DMA ring to use for the copy
  * @tmz: if we should setup a TMZ enabled mapping
  * @size: in number of bytes to map, out number of bytes mapped
@@ -182,7 +200,9 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
 				 struct ttm_buffer_object *bo,
 				 struct ttm_resource *mem,
 				 struct amdgpu_res_cursor *mm_cur,
-				 unsigned int window, struct amdgpu_ring *ring,
+				 unsigned int window,
+				 bool use_two_windows,
+				 struct amdgpu_ring *ring,
 				 bool tmz, uint64_t *size, uint64_t *addr,
 				 struct dma_fence *dep,
 				 struct dma_resv *resv)
@@ -202,13 +222,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
 	if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
 		return -EINVAL;
 
-	/* Map only what can't be accessed directly */
-	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
-		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
-			mm_cur->start;
+	if (!amdgpu_ttm_needs_gart_window(ring, mem, mm_cur, tmz, addr))
 		return 0;
-	}
-
 
 	/*
 	 * If start begins at an offset inside the page, then adjust the size
@@ -217,7 +232,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity,
 	offset = mm_cur->start & ~PAGE_MASK;
 
 	num_pages = PFN_UP(*size + offset);
-	num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
+	num_pages = min_t(uint32_t,
+		num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE * (use_two_windows ? 2 : 1));
 
 	*size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
 
@@ -308,8 +324,10 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 				      struct dma_resv *resv,
 				      struct dma_fence **f)
 {
+	bool src_needs_gart_window, dst_needs_gart_window, use_two_gart_windows;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_rings[0];
 	struct amdgpu_res_cursor src_mm, dst_mm;
+	int src_gart_window, dst_gart_window;
 	struct dma_fence *fence = NULL;
 	int r = 0;
 	uint32_t copy_flags = 0;
@@ -333,20 +351,43 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 		/* Never copy more than 256MiB at once to avoid a timeout */
 		cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
 
-		/* Map src to window 0 and dst to window 1. */
-		r = amdgpu_ttm_map_buffer(&entity->base,
-					  src->bo, src->mem, &src_mm,
-					  entity->gart_window_id0, ring, tmz, &cur_size, &from,
-					  NULL, NULL);
-		if (r)
-			goto error;
+		/* If only one direction needs a gart window to access memory, use both
+		 * windows for it.
+		 */
+		src_needs_gart_window =
+			amdgpu_ttm_needs_gart_window(ring, src->mem, &src_mm, tmz, &from);
+		dst_needs_gart_window =
+			amdgpu_ttm_needs_gart_window(ring, dst->mem, &dst_mm, tmz, &to);
 
-		r = amdgpu_ttm_map_buffer(&entity->base,
-					  dst->bo, dst->mem, &dst_mm,
-					  entity->gart_window_id1, ring, tmz, &cur_size, &to,
-					  NULL, NULL);
-		if (r)
-			goto error;
+		if (src_needs_gart_window) {
+			src_gart_window = entity->gart_window_id0;
+			use_two_gart_windows = !dst_needs_gart_window;
+		}
+		if (dst_needs_gart_window) {
+			dst_gart_window = src_needs_gart_window ?
+				entity->gart_window_id1 : entity->gart_window_id0;
+			use_two_gart_windows = !src_needs_gart_window;
+		}
+
+		if (src_needs_gart_window) {
+			r = amdgpu_ttm_map_buffer(&entity->base,
+						  src->bo, src->mem, &src_mm,
+						  src_gart_window, use_two_gart_windows,
+						  ring, tmz, &cur_size, &from,
+						  NULL, NULL);
+			if (r)
+				goto error;
+		}
+
+		if (dst_needs_gart_window) {
+			r = amdgpu_ttm_map_buffer(&entity->base,
+						  dst->bo, dst->mem, &dst_mm,
+						  dst_gart_window, use_two_gart_windows,
+						  ring, tmz, &cur_size, &to,
+						  NULL, NULL);
+			if (r)
+				goto error;
+		}
 
 		abo_src = ttm_to_amdgpu_bo(src->bo);
 		abo_dst = ttm_to_amdgpu_bo(dst->bo);
@@ -2506,7 +2547,7 @@ int amdgpu_clear_buffer(struct amdgpu_ttm_entity *entity,
 
 		r = amdgpu_ttm_map_buffer(&entity->base,
 					  &bo->tbo, bo->tbo.resource, &dst,
-					  entity->gart_window_id1, ring, false,
+					  entity->gart_window_id1, false, ring, false,
 					  &cur_size, &to,
 					  dependency,
 					  resv);
-- 
2.43.0