[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fcefae12-7d1e-41ac-974c-06bc88c175e6@igalia.com>
Date: Tue, 3 Dec 2024 09:22:52 -0300
From: Maíra Canal <mcanal@...lia.com>
To: Christian Gmeiner <christian.gmeiner@...il.com>,
Melissa Wen <mwen@...lia.com>,
Maarten Lankhorst <maarten.lankhorst@...ux.intel.com>,
Maxime Ripard <mripard@...nel.org>, Thomas Zimmermann <tzimmermann@...e.de>,
David Airlie <airlied@...il.com>, Simona Vetter <simona@...ll.ch>
Cc: kernel-dev@...lia.com, Christian Gmeiner <cgmeiner@...lia.com>,
dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v4] drm/v3d: Add DRM_IOCTL_V3D_PERFMON_SET_GLOBAL
Hi Christian,
Thanks for your patch!
On 02/12/24 11:06, Christian Gmeiner wrote:
> From: Christian Gmeiner <cgmeiner@...lia.com>
>
> Add a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, to allow
> configuration of a global performance monitor (perfmon).
> Use the global perfmon for all jobs to ensure consistent
> performance tracking across submissions. This feature is
> needed to implement a Perfetto datasources in user-space.
>
> Signed-off-by: Christian Gmeiner <cgmeiner@...lia.com>
Reviewed-by: Maíra Canal <mcanal@...lia.com>
Best Regards,
- Maíra
> ---
> Changes in v4:
> - Rebased on drm-misc-next.
> - Factored out a small change as separate patch.
> - Fixed some grammar mistakes: s/job/jobs.
>
> Changes in v3:
> - Reworked commit message.
> - Refined some code comments.
> - Added missing v3d_perfmon_stop(..) call to v3d_perfmon_destroy_ioctl(..).
>
> Changes in v2:
> - Reworked commit message.
> - Removed num_perfmon counter for tracking perfmon allocations.
> - Allowing allocation of perfmons when the global perfmon is active.
> - Return -EAGAIN for submissions with a per job perfmon if the global perfmon is active.
> ---
> drivers/gpu/drm/v3d/v3d_drv.c | 1 +
> drivers/gpu/drm/v3d/v3d_drv.h | 8 +++++++
> drivers/gpu/drm/v3d/v3d_perfmon.c | 37 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/v3d/v3d_sched.c | 14 +++++++++---
> drivers/gpu/drm/v3d/v3d_submit.c | 10 +++++++++
> include/uapi/drm/v3d_drm.h | 15 +++++++++++++
> 6 files changed, 82 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
> index fb35c5c3f1a7..8e5cacfa38d3 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -224,6 +224,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
> DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW),
> };
>
> static const struct drm_driver v3d_drm_driver = {
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index de73eefff9ac..dc1cfe2e14be 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -183,6 +183,12 @@ struct v3d_dev {
> u32 num_allocated;
> u32 pages_allocated;
> } bo_stats;
> +
> + /* To support a performance analysis tool in user space, we require
> + * a single, globally configured performance monitor (perfmon) for
> + * all jobs.
> + */
> + struct v3d_perfmon *global_perfmon;
> };
>
> static inline struct v3d_dev *
> @@ -594,6 +600,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv);
>
> /* v3d_sysfs.c */
> int v3d_sysfs_init(struct device *dev);
> diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c
> index b4c3708ea781..a1429b9684e0 100644
> --- a/drivers/gpu/drm/v3d/v3d_perfmon.c
> +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
> @@ -313,6 +313,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data)
> if (perfmon == v3d->active_perfmon)
> v3d_perfmon_stop(v3d, perfmon, false);
>
> + /* If the global perfmon is being destroyed, set it to NULL */
> + cmpxchg(&v3d->global_perfmon, perfmon, NULL);
> +
> v3d_perfmon_put(perfmon);
>
> return 0;
> @@ -398,6 +401,9 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
> if (perfmon == v3d->active_perfmon)
> v3d_perfmon_stop(v3d, perfmon, false);
>
> + /* If the global perfmon is being destroyed, set it to NULL */
> + cmpxchg(&v3d->global_perfmon, perfmon, NULL);
> +
> v3d_perfmon_put(perfmon);
>
> return 0;
> @@ -457,3 +463,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data,
>
> return 0;
> }
> +
> +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
> + struct drm_v3d_perfmon_set_global *req = data;
> + struct v3d_dev *v3d = to_v3d_dev(dev);
> + struct v3d_perfmon *perfmon;
> +
> + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL)
> + return -EINVAL;
> +
> + perfmon = v3d_perfmon_find(v3d_priv, req->id);
> + if (!perfmon)
> + return -EINVAL;
> +
> + /* If the request is to clear the global performance monitor */
> + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) {
> + if (!v3d->global_perfmon)
> + return -EINVAL;
> +
> + xchg(&v3d->global_perfmon, NULL);
> +
> + return 0;
> + }
> +
> + if (cmpxchg(&v3d->global_perfmon, NULL, perfmon))
> + return -EBUSY;
> +
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 99ac4995b5a1..a6c3760da6ed 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -120,11 +120,19 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job)
> static void
> v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
> {
> - if (job->perfmon != v3d->active_perfmon)
> + struct v3d_perfmon *perfmon = v3d->global_perfmon;
> +
> + if (!perfmon)
> + perfmon = job->perfmon;
> +
> + if (perfmon == v3d->active_perfmon)
> + return;
> +
> + if (perfmon != v3d->active_perfmon)
> v3d_perfmon_stop(v3d, v3d->active_perfmon, true);
>
> - if (job->perfmon && v3d->active_perfmon != job->perfmon)
> - v3d_perfmon_start(v3d, job->perfmon);
> + if (perfmon && v3d->active_perfmon != perfmon)
> + v3d_perfmon_start(v3d, perfmon);
> }
>
> static void
> diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
> index d607aa9c4ec2..9e439c9f0a93 100644
> --- a/drivers/gpu/drm/v3d/v3d_submit.c
> +++ b/drivers/gpu/drm/v3d/v3d_submit.c
> @@ -981,6 +981,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
> goto fail;
>
> if (args->perfmon_id) {
> + if (v3d->global_perfmon) {
> + ret = -EAGAIN;
> + goto fail_perfmon;
> + }
> +
> render->base.perfmon = v3d_perfmon_find(v3d_priv,
> args->perfmon_id);
>
> @@ -1196,6 +1201,11 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
> goto fail;
>
> if (args->perfmon_id) {
> + if (v3d->global_perfmon) {
> + ret = -EAGAIN;
> + goto fail_perfmon;
> + }
> +
> job->base.perfmon = v3d_perfmon_find(v3d_priv,
> args->perfmon_id);
> if (!job->base.perfmon) {
> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 2376c73abca1..97b1faf04fc4 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -43,6 +43,7 @@ extern "C" {
> #define DRM_V3D_PERFMON_GET_VALUES 0x0a
> #define DRM_V3D_SUBMIT_CPU 0x0b
> #define DRM_V3D_PERFMON_GET_COUNTER 0x0c
> +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d
>
> #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
> #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
> @@ -61,6 +62,8 @@ extern "C" {
> #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu)
> #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \
> struct drm_v3d_perfmon_get_counter)
> +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \
> + struct drm_v3d_perfmon_set_global)
>
> #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01
> #define DRM_V3D_SUBMIT_EXTENSION 0x02
> @@ -766,6 +769,18 @@ struct drm_v3d_perfmon_get_counter {
> __u8 reserved[7];
> };
>
> +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001
> +
> +/**
> + * struct drm_v3d_perfmon_set_global - ioctl to define a global performance
> + * monitor that is used for all jobs. If a global performance monitor is
> + * defined, jobs with a self-defined performance monitor are not allowed.
> + */
> +struct drm_v3d_perfmon_set_global {
> + __u32 flags;
> + __u32 id;
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
> --
> 2.47.1
>
Powered by blists - more mailing lists