lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJZ5v0hAN0Csd6Qnc=2hNGafpbEGRVGX41LC8qrmUkoCk=whrw@mail.gmail.com>
Date: Tue, 9 Jul 2024 18:06:40 +0200
From: "Rafael J. Wysocki" <rafael@...nel.org>
To: Petr Machata <petrm@...dia.com>
Cc: "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, 
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org, 
	Ido Schimmel <idosch@...dia.com>, mlxsw@...dia.com, linux-pm@...r.kernel.org, 
	Vadim Pasternak <vadimp@...dia.com>
Subject: Re: [PATCH net-next 2/3] mlxsw: core_thermal: Report valid current
 state during cooling device registration

On Mon, Jul 1, 2024 at 6:45 PM Petr Machata <petrm@...dia.com> wrote:
>
> From: Ido Schimmel <idosch@...dia.com>
>
> Commit 31a0fa0019b0 ("thermal/debugfs: Pass cooling device state to
> thermal_debug_cdev_add()") changed the thermal core to read the current
> state of the cooling device as part of the cooling device's
> registration. This is incompatible with the current implementation of
> the cooling device operations in mlxsw, leading to initialization
> failure with errors such as:
>
> mlxsw_spectrum 0000:01:00.0: Failed to register cooling device
> mlxsw_spectrum 0000:01:00.0: cannot register bus device
>
> The reason for the failure is that when the get current state operation
> is invoked the driver tries to derive the index of the cooling device by
> walking a per thermal zone array and looking for the matching cooling
> device pointer. However, the pointer is returned from the registration
> function and therefore only set in the array after the registration.
>
> The issue was later fixed by commit 1af89dedc8a5 ("thermal: core: Do not
> fail cdev registration because of invalid initial state") by not failing
> the registration of the cooling device if it cannot report a valid
> current state during registration, although drivers are responsible for
> ensuring that this will not happen.
>
> Therefore, make sure the driver is able to report a valid current state
> for the cooling device during registration by passing to the
> registration function a per cooling device private data that already has
> the cooling device index populated.
>
> Cc: linux-pm@...r.kernel.org
> Reviewed-by: Vadim Pasternak <vadimp@...dia.com>
> Signed-off-by: Ido Schimmel <idosch@...dia.com>
> Signed-off-by: Petr Machata <petrm@...dia.com>

Acked-by: Rafael J. Wysocki <rafael@...nel.org>

> ---
>  .../ethernet/mellanox/mlxsw/core_thermal.c    | 50 ++++++++++---------
>  1 file changed, 26 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> index 5c511e1a8efa..eee3e37983ca 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> @@ -100,6 +100,12 @@ static const struct mlxsw_cooling_states default_cooling_states[] = {
>
>  struct mlxsw_thermal;
>
> +struct mlxsw_thermal_cooling_device {
> +       struct mlxsw_thermal *thermal;
> +       struct thermal_cooling_device *cdev;
> +       unsigned int idx;
> +};
> +
>  struct mlxsw_thermal_module {
>         struct mlxsw_thermal *parent;
>         struct thermal_zone_device *tzdev;
> @@ -123,7 +129,7 @@ struct mlxsw_thermal {
>         const struct mlxsw_bus_info *bus_info;
>         struct thermal_zone_device *tzdev;
>         int polling_delay;
> -       struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
> +       struct mlxsw_thermal_cooling_device cdevs[MLXSW_MFCR_PWMS_MAX];
>         struct thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
>         struct mlxsw_cooling_states cooling_states[MLXSW_THERMAL_NUM_TRIPS];
>         struct mlxsw_thermal_area line_cards[];
> @@ -147,7 +153,7 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
>         int i;
>
>         for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
> -               if (thermal->cdevs[i] == cdev)
> +               if (thermal->cdevs[i].cdev == cdev)
>                         return i;
>
>         /* Allow mlxsw thermal zone binding to an external cooling device */
> @@ -352,17 +358,14 @@ static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
>                                        unsigned long *p_state)
>
>  {
> -       struct mlxsw_thermal *thermal = cdev->devdata;
> +       struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata;
> +       struct mlxsw_thermal *thermal = mlxsw_cdev->thermal;
>         struct device *dev = thermal->bus_info->dev;
>         char mfsc_pl[MLXSW_REG_MFSC_LEN];
> -       int err, idx;
>         u8 duty;
> +       int err;
>
> -       idx = mlxsw_get_cooling_device_idx(thermal, cdev);
> -       if (idx < 0)
> -               return idx;
> -
> -       mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
> +       mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx, 0);
>         err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
>         if (err) {
>                 dev_err(dev, "Failed to query PWM duty\n");
> @@ -378,22 +381,19 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
>                                        unsigned long state)
>
>  {
> -       struct mlxsw_thermal *thermal = cdev->devdata;
> +       struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata;
> +       struct mlxsw_thermal *thermal = mlxsw_cdev->thermal;
>         struct device *dev = thermal->bus_info->dev;
>         char mfsc_pl[MLXSW_REG_MFSC_LEN];
> -       int idx;
>         int err;
>
>         if (state > MLXSW_THERMAL_MAX_STATE)
>                 return -EINVAL;
>
> -       idx = mlxsw_get_cooling_device_idx(thermal, cdev);
> -       if (idx < 0)
> -               return idx;
> -
>         /* Normalize the state to the valid speed range. */
>         state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state);
> -       mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
> +       mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx,
> +                           mlxsw_state_to_duty(state));
>         err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
>         if (err) {
>                 dev_err(dev, "Failed to write PWM duty\n");
> @@ -753,17 +753,21 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
>         }
>         for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
>                 if (pwm_active & BIT(i)) {
> +                       struct mlxsw_thermal_cooling_device *mlxsw_cdev;
>                         struct thermal_cooling_device *cdev;
>
> +                       mlxsw_cdev = &thermal->cdevs[i];
> +                       mlxsw_cdev->thermal = thermal;
> +                       mlxsw_cdev->idx = i;
>                         cdev = thermal_cooling_device_register("mlxsw_fan",
> -                                                              thermal,
> +                                                              mlxsw_cdev,
>                                                                &mlxsw_cooling_ops);
>                         if (IS_ERR(cdev)) {
>                                 err = PTR_ERR(cdev);
>                                 dev_err(dev, "Failed to register cooling device\n");
>                                 goto err_thermal_cooling_device_register;
>                         }
> -                       thermal->cdevs[i] = cdev;
> +                       mlxsw_cdev->cdev = cdev;
>                 }
>         }
>
> @@ -824,8 +828,8 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
>  err_thermal_zone_device_register:
>  err_thermal_cooling_device_register:
>         for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
> -               if (thermal->cdevs[i])
> -                       thermal_cooling_device_unregister(thermal->cdevs[i]);
> +               if (thermal->cdevs[i].cdev)
> +                       thermal_cooling_device_unregister(thermal->cdevs[i].cdev);
>  err_reg_write:
>  err_reg_query:
>         kfree(thermal);
> @@ -848,10 +852,8 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
>         }
>
>         for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
> -               if (thermal->cdevs[i]) {
> -                       thermal_cooling_device_unregister(thermal->cdevs[i]);
> -                       thermal->cdevs[i] = NULL;
> -               }
> +               if (thermal->cdevs[i].cdev)
> +                       thermal_cooling_device_unregister(thermal->cdevs[i].cdev);
>         }
>
>         kfree(thermal);
> --
> 2.45.0
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ