[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <smbbl7zs7ix7ue33k52i225nwycnh2psxwc2ohqysxbvwubm7f@v5wilk7eulc6>
Date: Tue, 28 Oct 2025 11:46:39 +0100
From: Michał Winiarski <michal.winiarski@...el.com>
To: Michal Wajdeczko <michal.wajdeczko@...el.com>
CC: Alex Williamson <alex.williamson@...hat.com>, Lucas De Marchi
<lucas.demarchi@...el.com>, Thomas Hellström
<thomas.hellstrom@...ux.intel.com>, Rodrigo Vivi <rodrigo.vivi@...el.com>,
Jason Gunthorpe <jgg@...pe.ca>, Yishai Hadas <yishaih@...dia.com>, Kevin Tian
<kevin.tian@...el.com>, <intel-xe@...ts.freedesktop.org>,
<linux-kernel@...r.kernel.org>, <kvm@...r.kernel.org>, Matthew Brost
<matthew.brost@...el.com>, <dri-devel@...ts.freedesktop.org>, Jani Nikula
<jani.nikula@...ux.intel.com>, Joonas Lahtinen
<joonas.lahtinen@...ux.intel.com>, Tvrtko Ursulin <tursulin@...ulin.net>,
David Airlie <airlied@...il.com>, Simona Vetter <simona@...ll.ch>, "Lukasz
Laguna" <lukasz.laguna@...el.com>
Subject: Re: [PATCH v2 11/26] drm/xe/pf: Increase PF GuC Buffer Cache size
and use it for VF migration
On Thu, Oct 23, 2025 at 07:37:48PM +0200, Michal Wajdeczko wrote:
>
>
> On 10/22/2025 12:41 AM, Michał Winiarski wrote:
> > Contiguous PF GGTT VMAs can be scarce after creating VFs.
> > Increase the GuC buffer cache size to 4M for PF so that we can fit GuC
> > migration data (which currently maxes out at just under 4M) and use the
>
> but the code below still uses 8M
Yeah - turns out we need more than 4M (I did my math on one of the
structs, but there's actually additional data present), so let's just
stick to 8M for now.
>
> > cache instead of allocating fresh BOs.
> >
> > Signed-off-by: Michał Winiarski <michal.winiarski@...el.com>
> > ---
> > drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 46 ++++++-------------
> > drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h | 3 ++
> > drivers/gpu/drm/xe/xe_guc.c | 12 ++++-
> > 3 files changed, 28 insertions(+), 33 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> > index 4e26feb9c267f..04fad3126865c 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> > @@ -11,7 +11,7 @@
> > #include "xe_gt_sriov_pf_helpers.h"
> > #include "xe_gt_sriov_pf_migration.h"
> > #include "xe_gt_sriov_printk.h"
> > -#include "xe_guc.h"
> > +#include "xe_guc_buf.h"
> > #include "xe_guc_ct.h"
> > #include "xe_sriov.h"
> > #include "xe_sriov_migration_data.h"
> > @@ -57,73 +57,55 @@ static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid)
> >
> > /* Return: number of state dwords saved or a negative error code on failure */
> > static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
> > - void *buff, size_t size)
> > + void *dst, size_t size)
> > {
> > const int ndwords = size / sizeof(u32);
> > - struct xe_tile *tile = gt_to_tile(gt);
> > - struct xe_device *xe = tile_to_xe(tile);
> > struct xe_guc *guc = >->uc.guc;
> > - struct xe_bo *bo;
> > + CLASS(xe_guc_buf, buf)(&guc->buf, ndwords);
> > int ret;
> >
> > xe_gt_assert(gt, size % sizeof(u32) == 0);
> > xe_gt_assert(gt, size == ndwords * sizeof(u32));
> >
> > - bo = xe_bo_create_pin_map_novm(xe, tile,
> > - ALIGN(size, PAGE_SIZE),
> > - ttm_bo_type_kernel,
> > - XE_BO_FLAG_SYSTEM |
> > - XE_BO_FLAG_GGTT |
> > - XE_BO_FLAG_GGTT_INVALIDATE, false);
> > - if (IS_ERR(bo))
> > - return PTR_ERR(bo);
> > + if (!xe_guc_buf_is_valid(buf))
> > + return -ENOBUFS;
> > +
> > + memset(xe_guc_buf_cpu_ptr(buf), 0, size);
>
> hmm, I didn't find in the GuC spec that this buffer must be zeroed, so why bother?
That was found during testing, GuC actually expects the buffer to be
zeroed.
I'll ping folks to update the spec.
> >
> > ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
> > - xe_bo_ggtt_addr(bo), ndwords);
> > + xe_guc_buf_flush(buf), ndwords);
> > if (!ret)
> > ret = -ENODATA;
> > else if (ret > ndwords)
> > ret = -EPROTO;
> > else if (ret > 0)
> > - xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
> > + memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32));
>
> nit: given this usage, maybe one day we should add optimized variant that copies directly to dst?
>
> xe_guc_buf_sync_into(buf, dst, size);
>
> >
> > - xe_bo_unpin_map_no_vm(bo);
> > return ret;
> > }
> >
> > /* Return: number of state dwords restored or a negative error code on failure */
> > static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
> > - const void *buff, size_t size)
> > + const void *src, size_t size)
> > {
> > const int ndwords = size / sizeof(u32);
> > - struct xe_tile *tile = gt_to_tile(gt);
> > - struct xe_device *xe = tile_to_xe(tile);
> > struct xe_guc *guc = >->uc.guc;
> > - struct xe_bo *bo;
> > + CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size);
> > int ret;
> >
> > xe_gt_assert(gt, size % sizeof(u32) == 0);
> > xe_gt_assert(gt, size == ndwords * sizeof(u32));
> >
> > - bo = xe_bo_create_pin_map_novm(xe, tile,
> > - ALIGN(size, PAGE_SIZE),
> > - ttm_bo_type_kernel,
> > - XE_BO_FLAG_SYSTEM |
> > - XE_BO_FLAG_GGTT |
> > - XE_BO_FLAG_GGTT_INVALIDATE, false);
> > - if (IS_ERR(bo))
> > - return PTR_ERR(bo);
> > -
> > - xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
> > + if (!xe_guc_buf_is_valid(buf))
> > + return -ENOBUFS;
> >
> > ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
> > - xe_bo_ggtt_addr(bo), ndwords);
> > + xe_guc_buf_flush(buf), ndwords);
> > if (!ret)
> > ret = -ENODATA;
> > else if (ret > ndwords)
> > ret = -EPROTO;
> >
> > - xe_bo_unpin_map_no_vm(bo);
> > return ret;
> > }
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> > index e2d41750f863c..4f2f2783339c3 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> > @@ -11,6 +11,9 @@
> > struct xe_gt;
> > struct xe_sriov_migration_data;
> >
> > +/* TODO: get this information by querying GuC in the future */
> > +#define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M
>
> so it's 8M or 4M ?
>
> maybe wrap that into function now
>
> u32 xe_gt_sriov_pf_migration_guc_data_size(struct xe_gt *gt)
> {
> if (xe_sriov_pf_migration_supported(gt_to_xe))
> return SZ_4M; /* TODO: ... */
> return 0;
> }
XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE disappears from this header
as a result of previous changes, so the size calculation can be kept
static.
>
> > +
> > int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
> > int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid);
> > int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid);
> > diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> > index 7c65528859ecb..cd6ab277a7876 100644
> > --- a/drivers/gpu/drm/xe/xe_guc.c
> > +++ b/drivers/gpu/drm/xe/xe_guc.c
> > @@ -24,6 +24,7 @@
> > #include "xe_gt_printk.h"
> > #include "xe_gt_sriov_vf.h"
> > #include "xe_gt_throttle.h"
> > +#include "xe_gt_sriov_pf_migration.h"
> > #include "xe_guc_ads.h"
> > #include "xe_guc_buf.h"
> > #include "xe_guc_capture.h"
> > @@ -40,6 +41,7 @@
> > #include "xe_mmio.h"
> > #include "xe_platform_types.h"
> > #include "xe_sriov.h"
> > +#include "xe_sriov_pf_migration.h"
> > #include "xe_uc.h"
> > #include "xe_uc_fw.h"
> > #include "xe_wa.h"
> > @@ -821,6 +823,14 @@ static int vf_guc_init_post_hwconfig(struct xe_guc *guc)
> > return 0;
> > }
> >
> > +static u32 guc_buf_cache_size(struct xe_guc *guc)
> > +{
> > + if (IS_SRIOV_PF(guc_to_xe(guc)) && xe_sriov_pf_migration_supported(guc_to_xe(guc)))
> > + return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE;
>
> then
> u32 size = XE_GUC_BUF_CACHE_DEFAULT_SIZE;
>
> if (IS_SRIOV_PF(guc_to_xe(guc)))
> size += xe_gt_sriov_pf_migration_guc_data_size(guc_to_gt(guc));
>
> return size;
As the cache gets reused, we don't need to add anything to the default
(we should just replace the size with the new requirement for the
largest object size).
Thanks,
-Michał
>
> > + else
> > + return XE_GUC_BUF_CACHE_DEFAULT_SIZE;
> > +}
> > +
> > /**
> > * xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
> > * @guc: The GuC object
> > @@ -860,7 +870,7 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
> > if (ret)
> > return ret;
> >
> > - ret = xe_guc_buf_cache_init(&guc->buf, XE_GUC_BUF_CACHE_DEFAULT_SIZE);
> > + ret = xe_guc_buf_cache_init(&guc->buf, guc_buf_cache_size(guc));
> > if (ret)
> > return ret;
> >
>
Powered by blists - more mailing lists