[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251011193847.1836454-23-michal.winiarski@intel.com>
Date: Sat, 11 Oct 2025 21:38:43 +0200
From: Michał Winiarski <michal.winiarski@...el.com>
To: Alex Williamson <alex.williamson@...hat.com>, Lucas De Marchi
<lucas.demarchi@...el.com>, Thomas Hellström
<thomas.hellstrom@...ux.intel.com>, Rodrigo Vivi <rodrigo.vivi@...el.com>,
Jason Gunthorpe <jgg@...pe.ca>, Yishai Hadas <yishaih@...dia.com>, Kevin Tian
<kevin.tian@...el.com>, Shameer Kolothum
<shameerali.kolothum.thodi@...wei.com>, <intel-xe@...ts.freedesktop.org>,
<linux-kernel@...r.kernel.org>, <kvm@...r.kernel.org>
CC: <dri-devel@...ts.freedesktop.org>, Matthew Brost
<matthew.brost@...el.com>, Michal Wajdeczko <michal.wajdeczko@...el.com>,
Jani Nikula <jani.nikula@...ux.intel.com>, Joonas Lahtinen
<joonas.lahtinen@...ux.intel.com>, Tvrtko Ursulin <tursulin@...ulin.net>,
David Airlie <airlied@...il.com>, Simona Vetter <simona@...ll.ch>, "Lukasz
Laguna" <lukasz.laguna@...el.com>
Subject: [PATCH 22/26] drm/xe/migrate: Add function for raw copy of VRAM and CCS
From: Lukasz Laguna <lukasz.laguna@...el.com>
Introduce a new function to copy data between VRAM and sysmem objects.
It's specifically designed for raw data copies, whereas the existing
xe_migrate_copy() is tailored for eviction and restore operations,
which involves additional logic. For instance, xe_migrate_copy() skips
CCS metadata copies on Xe2 dGPUs, as it's unnecessary in eviction
scenario. However, in cases like VF migration, CCS metadata has to be
saved and restored in its raw form.
Additionally, xe_migrate_raw_vram_copy() allows copying not only entire
objects, but also chunks of data, as well as copying corresponding CCS
metadata to or from a dedicated buffer object, which are essential in
case of VF migration.
Signed-off-by: Lukasz Laguna <lukasz.laguna@...el.com>
---
drivers/gpu/drm/xe/xe_migrate.c | 214 +++++++++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_migrate.h | 4 +
2 files changed, 217 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7345a5b65169a..3f8804a2f4ee2 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -501,7 +501,7 @@ int xe_migrate_init(struct xe_migrate *m)
static u64 max_mem_transfer_per_pass(struct xe_device *xe)
{
- if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
+ if ((!IS_DGFX(xe) || IS_SRIOV_PF(xe)) && xe_device_has_flat_ccs(xe))
return MAX_CCS_LIMITED_TRANSFER;
return MAX_PREEMPTDISABLE_TRANSFER;
@@ -1142,6 +1142,218 @@ struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate)
return migrate->q;
}
+/**
+ * xe_migrate_raw_vram_copy() - Raw copy of VRAM object and corresponding CCS.
+ * @vram_bo: The VRAM buffer object.
+ * @vram_offset: The VRAM offset.
+ * @sysmem_bo: The sysmem buffer object. If copying only CCS metadata set this
+ * to NULL.
+ * @sysmem_offset: The sysmem offset.
+ * @ccs_bo: The CCS buffer object located in sysmem. If copying of CCS metadata
+ * is not needed set this to NULL.
+ * @ccs_offset: The CCS offset.
+ * @size: The size of VRAM chunk to copy.
+ * @to_sysmem: True to copy from VRAM to sysmem, false for opposite direction.
+ *
+ * Copies the content of buffer object from or to VRAM. If supported and
+ * needed, it also copies corresponding CCS metadata.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_raw_vram_copy(struct xe_bo *vram_bo, u64 vram_offset,
+ struct xe_bo *sysmem_bo, u64 sysmem_offset,
+ struct xe_bo *ccs_bo, u64 ccs_offset,
+ u64 size, bool to_sysmem)
+{
+ struct xe_device *xe = xe_bo_device(vram_bo);
+ struct xe_tile *tile = vram_bo->tile;
+ struct xe_gt *gt = tile->primary_gt;
+ struct xe_migrate *m = tile->migrate;
+ struct dma_fence *fence = NULL;
+ struct ttm_resource *vram = vram_bo->ttm.resource, *sysmem, *ccs;
+ struct xe_res_cursor vram_it, sysmem_it, ccs_it;
+ u64 vram_L0_ofs, sysmem_L0_ofs;
+ u32 vram_L0_pt, sysmem_L0_pt;
+ u64 vram_L0, sysmem_L0;
+ bool copy_content = sysmem_bo ? true : false;
+ bool copy_ccs = ccs_bo ? true : false;
+ bool use_comp_pat = copy_content && to_sysmem &&
+ xe_device_has_flat_ccs(xe) && GRAPHICS_VER(xe) >= 20;
+ int pass = 0;
+ int err;
+
+ if (!copy_content && !copy_ccs)
+ return ERR_PTR(-EINVAL);
+
+ if (!IS_ALIGNED(vram_offset | sysmem_offset | ccs_offset | size, PAGE_SIZE))
+ return ERR_PTR(-EINVAL);
+
+ if (!xe_bo_is_vram(vram_bo))
+ return ERR_PTR(-EINVAL);
+
+ if (range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size))
+ return ERR_PTR(-EOVERFLOW);
+
+ if (copy_content) {
+ if (xe_bo_is_vram(sysmem_bo))
+ return ERR_PTR(-EINVAL);
+ if (range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size))
+ return ERR_PTR(-EOVERFLOW);
+ }
+
+ if (copy_ccs) {
+ if (xe_bo_is_vram(ccs_bo))
+ return ERR_PTR(-EINVAL);
+ if (!xe_device_has_flat_ccs(xe))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (ccs_bo->ttm.base.size < xe_device_ccs_bytes(xe, size))
+ return ERR_PTR(-EINVAL);
+ if (range_overflows(ccs_offset, (u64)xe_device_ccs_bytes(xe, size),
+ (u64)ccs_bo->ttm.base.size))
+ return ERR_PTR(-EOVERFLOW);
+ }
+
+ xe_res_first(vram, vram_offset, size, &vram_it);
+
+ if (copy_content) {
+ sysmem = sysmem_bo->ttm.resource;
+ xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it);
+ }
+
+ if (copy_ccs) {
+ ccs = ccs_bo->ttm.resource;
+ xe_res_first_sg(xe_bo_sg(ccs_bo), ccs_offset, xe_device_ccs_bytes(xe, size),
+ &ccs_it);
+ }
+
+ while (size) {
+ u32 pte_flags = PTE_UPDATE_FLAG_IS_VRAM;
+ u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ u32 flush_flags = 0;
+ u32 update_idx;
+ u64 ccs_ofs, ccs_size;
+ u32 ccs_pt;
+
+ bool usm = xe->info.has_usm;
+ u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+ vram_L0 = xe_migrate_res_sizes(m, &vram_it);
+
+ if (copy_content) {
+ sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it);
+ vram_L0 = min(vram_L0, sysmem_L0);
+ }
+
+ drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, vram_L0);
+
+ pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0;
+ batch_size += pte_update_size(m, pte_flags, vram, &vram_it, &vram_L0,
+ &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts);
+ if (copy_content) {
+ batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0,
+ &sysmem_L0_ofs, &sysmem_L0_pt, 0, avail_pts,
+ avail_pts);
+ }
+
+ if (copy_ccs) {
+ ccs_size = xe_device_ccs_bytes(xe, vram_L0);
+ batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs,
+ &ccs_pt, 0, copy_content ? 2 * avail_pts :
+ avail_pts, avail_pts);
+ xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
+ }
+
+ batch_size += copy_content ? EMIT_COPY_DW : 0;
+ batch_size += copy_ccs ? EMIT_COPY_CCS_DW : 0;
+
+ bb = xe_bb_new(gt, batch_size, usm);
+ if (IS_ERR(bb)) {
+ err = PTR_ERR(bb);
+ goto err_sync;
+ }
+
+ if (xe_migrate_allow_identity(vram_L0, &vram_it))
+ xe_res_next(&vram_it, vram_L0);
+ else
+ emit_pte(m, bb, vram_L0_pt, true, use_comp_pat, &vram_it, vram_L0, vram);
+
+ if (copy_content)
+ emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem);
+
+ if (copy_ccs)
+ emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, ccs);
+
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ update_idx = bb->len;
+
+ if (copy_content)
+ emit_copy(gt, bb, to_sysmem ? vram_L0_ofs : sysmem_L0_ofs, to_sysmem ?
+ sysmem_L0_ofs : vram_L0_ofs, vram_L0, XE_PAGE_SIZE);
+
+ if (copy_ccs) {
+ emit_copy_ccs(gt, bb, to_sysmem ? ccs_ofs : vram_L0_ofs, !to_sysmem,
+ to_sysmem ? vram_L0_ofs : ccs_ofs, to_sysmem, vram_L0);
+ flush_flags = to_sysmem ? 0 : MI_FLUSH_DW_CCS;
+ }
+
+ job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm),
+ update_idx);
+ if (IS_ERR(job)) {
+ err = PTR_ERR(job);
+ goto err;
+ }
+
+ xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB);
+ if (!fence) {
+ err = xe_sched_job_add_deps(job, vram_bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+ if (!err && copy_content)
+ err = xe_sched_job_add_deps(job, sysmem_bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+ if (!err && copy_ccs)
+ err = xe_sched_job_add_deps(job, ccs_bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP);
+ if (err)
+ goto err_job;
+ }
+
+ mutex_lock(&m->job_mutex);
+ xe_sched_job_arm(job);
+ dma_fence_put(fence);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ dma_fence_put(m->fence);
+ m->fence = dma_fence_get(fence);
+
+ mutex_unlock(&m->job_mutex);
+
+ xe_bb_free(bb, fence);
+ size -= vram_L0;
+ continue;
+
+err_job:
+ xe_sched_job_put(job);
+err:
+ xe_bb_free(bb, NULL);
+
+err_sync:
+ /* Sync partial copy if any. FIXME: under job_mutex? */
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
+ return ERR_PTR(err);
+ }
+
+ return fence;
+}
+
static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u32 size, u32 pitch)
{
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index 4fad324b62535..0d8944b1cee61 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -131,6 +131,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
+struct dma_fence *xe_migrate_raw_vram_copy(struct xe_bo *vram_bo, u64 vram_offset,
+ struct xe_bo *sysmem_bo, u64 sysmem_offset,
+ struct xe_bo *ccs_bo, u64 ccs_offset,
+ u64 size, bool to_sysmem);
int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
unsigned long offset, void *buf, int len,
int write);
--
2.50.1
Powered by blists - more mailing lists