[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CADnq5_O92ourfZJyEKjYh4k01fMw1MWiYQLkEQNYsx2sTXa7nQ@mail.gmail.com>
Date: Tue, 8 Nov 2022 11:11:12 -0500
From: Alex Deucher <alexdeucher@...il.com>
To: Brian Norris <briannorris@...omium.org>,
"Kim, Jonathan" <Jonathan.Kim@....com>,
"Kuehling, Felix" <Felix.Kuehling@....com>
Cc: Alex Deucher <alexander.deucher@....com>,
Christian König <christian.koenig@....com>,
Xinhui <Xinhui.Pan@....com>, amd-gfx@...ts.freedesktop.org,
linux-kernel@...r.kernel.org, dri-devel@...ts.freedesktop.org
Subject: Re: [PATCH 1/2] drm/amdgpu: Move racy global PMU list into device
On Fri, Oct 28, 2022 at 6:48 PM Brian Norris <briannorris@...omium.org> wrote:
>
> If there are multiple amdgpu devices, this list processing can be racy.
>
> We're really treating this like a per-device list, so make that explicit
> and remove the global list.
>
> Signed-off-by: Brian Norris <briannorris@...omium.org>
@Kuehling, Felix @Kim, Jonathan can you take a look at this patch?
Thanks,
Alex
> ---
>
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c | 12 +++++-------
> 2 files changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 0e6ddf05c23c..e968b7f2417c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1063,6 +1063,10 @@ struct amdgpu_device {
> struct work_struct reset_work;
>
> bool job_hang;
> +
> +#if IS_ENABLED(CONFIG_PERF_EVENTS)
> + struct list_head pmu_list;
> +#endif
> };
>
> static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> index 71ee361d0972..24f2055a2f23 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> @@ -23,6 +23,7 @@
>
> #include <linux/perf_event.h>
> #include <linux/init.h>
> +#include <linux/list.h>
> #include "amdgpu.h"
> #include "amdgpu_pmu.h"
>
> @@ -72,9 +73,6 @@ static ssize_t amdgpu_pmu_event_show(struct device *dev,
> amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type);
> }
>
> -static LIST_HEAD(amdgpu_pmu_list);
> -
> -
> struct amdgpu_pmu_attr {
> const char *name;
> const char *config;
> @@ -558,7 +556,7 @@ static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry,
> pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events);
>
>
> - list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list);
> + list_add_tail(&pmu_entry->entry, &pmu_entry->adev->pmu_list);
>
> return 0;
> err_register:
> @@ -579,9 +577,7 @@ void amdgpu_pmu_fini(struct amdgpu_device *adev)
> {
> struct amdgpu_pmu_entry *pe, *temp;
>
> - list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) {
> - if (pe->adev != adev)
> - continue;
> + list_for_each_entry_safe(pe, temp, &adev->pmu_list, entry) {
> list_del(&pe->entry);
> perf_pmu_unregister(&pe->pmu);
> kfree(pe->pmu.attr_groups);
> @@ -623,6 +619,8 @@ int amdgpu_pmu_init(struct amdgpu_device *adev)
> int ret = 0;
> struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df;
>
> + INIT_LIST_HEAD(&adev->pmu_list);
> +
> switch (adev->asic_type) {
> case CHIP_VEGA20:
> pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF,
> --
> 2.38.1.273.g43a17bfeac-goog
>
Powered by blists - more mailing lists