[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <26681360-45a5-cfcc-e1f0-1c2e16fafca5@linaro.org>
Date: Sat, 1 Apr 2023 22:28:52 +0200
From: Daniel Lezcano <daniel.lezcano@...aro.org>
To: Petr Machata <petrm@...dia.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org
Cc: "Rafael J. Wysocki" <rafael@...nel.org>,
Ido Schimmel <idosch@...dia.com>,
Vadim Pasternak <vadimp@...dia.com>, mlxsw@...dia.com
Subject: Re: [PATCH net-next 1/3] mlxsw: core_thermal: Use static trip points
for transceiver modules
On 31/03/2023 16:17, Petr Machata wrote:
> From: Ido Schimmel <idosch@...dia.com>
>
> The driver registers a thermal zone for each transceiver module and
> tries to set the trip point temperatures according to the thresholds
> read from the transceiver. If a threshold cannot be read or if a
> transceiver is unplugged, the trip point temperature is set to zero,
> which means that it is disabled as far as the thermal subsystem is
> concerned.
>
> A recent change in the thermal core made it so that such trip points are
> no longer marked as disabled, which lead the thermal subsystem to
> incorrectly set the associated cooling devices to the their maximum
> state [1]. A fix to restore this behavior was merged in commit
> f1b80a3878b2 ("thermal: core: Restore behavior regarding invalid trip
> points"). However, the thermal maintainer suggested to not rely on this
> behavior and instead always register a valid array of trip points [2].
>
> Therefore, create a static array of trip points with sane defaults
> (suggested by Vadim) and register it with the thermal zone of each
> transceiver module. User space can choose to override these defaults
> using the thermal zone sysfs interface since these files are writeable.
>
> Before:
>
> $ cat /sys/class/thermal/thermal_zone11/type
> mlxsw-module11
> $ cat /sys/class/thermal/thermal_zone11/trip_point_*_temp
> 65000
> 75000
> 80000
>
> After:
>
> $ cat /sys/class/thermal/thermal_zone11/type
> mlxsw-module11
> $ cat /sys/class/thermal/thermal_zone11/trip_point_*_temp
> 55000
> 65000
> 80000
>
> Also tested by reverting commit f1b80a3878b2 ("thermal: core: Restore
> behavior regarding invalid trip points") and making sure that the
> associated cooling devices are not set to their maximum state.
>
> [1] https://lore.kernel.org/linux-pm/ZA3CFNhU4AbtsP4G@shredder/
> [2] https://lore.kernel.org/linux-pm/f78e6b70-a963-c0ca-a4b2-0d4c6aeef1fb@linaro.org/
>
> Signed-off-by: Ido Schimmel <idosch@...dia.com>
> Reviewed-by: Vadim Pasternak <vadimp@...dia.com>
> Signed-off-by: Petr Machata <petrm@...dia.com>
> ---
Sounds like the changes result in a nice cleanup :)
Thanks for taking care of doing these changes
-- Daniel
> .../ethernet/mellanox/mlxsw/core_thermal.c | 110 ++++--------------
> 1 file changed, 25 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> index 09ed6e5fa6c3..ece5075b7dbf 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> @@ -19,6 +19,9 @@
> #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */
> #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */
> #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */
> +#define MLXSW_THERMAL_MODULE_TEMP_NORM 55000 /* 55C */
> +#define MLXSW_THERMAL_MODULE_TEMP_HIGH 65000 /* 65C */
> +#define MLXSW_THERMAL_MODULE_TEMP_HOT 80000 /* 80C */
> #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */
> #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
> #define MLXSW_THERMAL_MAX_STATE 10
> @@ -30,12 +33,6 @@ static char * const mlxsw_thermal_external_allowed_cdev[] = {
> "mlxreg_fan",
> };
>
> -enum mlxsw_thermal_trips {
> - MLXSW_THERMAL_TEMP_TRIP_NORM,
> - MLXSW_THERMAL_TEMP_TRIP_HIGH,
> - MLXSW_THERMAL_TEMP_TRIP_HOT,
> -};
> -
> struct mlxsw_cooling_states {
> int min_state;
> int max_state;
> @@ -59,6 +56,24 @@ static const struct thermal_trip default_thermal_trips[] = {
> },
> };
>
> +static const struct thermal_trip default_thermal_module_trips[] = {
> + { /* In range - 0-40% PWM */
> + .type = THERMAL_TRIP_ACTIVE,
> + .temperature = MLXSW_THERMAL_MODULE_TEMP_NORM,
> + .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP,
> + },
> + {
> + /* In range - 40-100% PWM */
> + .type = THERMAL_TRIP_ACTIVE,
> + .temperature = MLXSW_THERMAL_MODULE_TEMP_HIGH,
> + .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP,
> + },
> + { /* Warning */
> + .type = THERMAL_TRIP_HOT,
> + .temperature = MLXSW_THERMAL_MODULE_TEMP_HOT,
> + },
> +};
> +
> static const struct mlxsw_cooling_states default_cooling_states[] = {
> {
> .min_state = 0,
> @@ -140,63 +155,6 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
> return -ENODEV;
> }
>
> -static void
> -mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
> -{
> - tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = 0;
> - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = 0;
> - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = 0;
> -}
> -
> -static int
> -mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
> - struct mlxsw_thermal_module *tz,
> - int crit_temp, int emerg_temp)
> -{
> - int err;
> -
> - /* Do not try to query temperature thresholds directly from the module's
> - * EEPROM if we got valid thresholds from MTMP.
> - */
> - if (!emerg_temp || !crit_temp) {
> - err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index,
> - tz->module,
> - SFP_TEMP_HIGH_WARN,
> - &crit_temp);
> - if (err)
> - return err;
> -
> - err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index,
> - tz->module,
> - SFP_TEMP_HIGH_ALARM,
> - &emerg_temp);
> - if (err)
> - return err;
> - }
> -
> - if (crit_temp > emerg_temp) {
> - dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
> - tz->tzdev->type, crit_temp, emerg_temp);
> - return 0;
> - }
> -
> - /* According to the system thermal requirements, the thermal zones are
> - * defined with three trip points. The critical and emergency
> - * temperature thresholds, provided by QSFP module are set as "active"
> - * and "hot" trip points, "normal" trip point is derived from "active"
> - * by subtracting double hysteresis value.
> - */
> - if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
> - tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp -
> - MLXSW_THERMAL_MODULE_TEMP_SHIFT;
> - else
> - tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp;
> - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = crit_temp;
> - tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = emerg_temp;
> -
> - return 0;
> -}
> -
> static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
> struct thermal_cooling_device *cdev)
> {
> @@ -358,10 +316,8 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
> struct mlxsw_thermal_module *tz = tzdev->devdata;
> struct mlxsw_thermal *thermal = tz->parent;
> int temp, crit_temp, emerg_temp;
> - struct device *dev;
> u16 sensor_index;
>
> - dev = thermal->bus_info->dev;
> sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
>
> /* Read module temperature and thresholds. */
> @@ -371,13 +327,6 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
> &crit_temp, &emerg_temp);
> *p_temp = temp;
>
> - if (!temp)
> - return 0;
> -
> - /* Update trip points. */
> - mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
> - crit_temp, emerg_temp);
> -
> return 0;
> }
>
> @@ -527,10 +476,7 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
> struct mlxsw_thermal_area *area, u8 module)
> {
> struct mlxsw_thermal_module *module_tz;
> - int dummy_temp, crit_temp, emerg_temp;
> - u16 sensor_index;
>
> - sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
> module_tz = &area->tz_module_arr[module];
> /* Skip if parent is already set (case of port split). */
> if (module_tz->parent)
> @@ -538,19 +484,13 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
> module_tz->module = module;
> module_tz->slot_index = area->slot_index;
> module_tz->parent = thermal;
> - memcpy(module_tz->trips, default_thermal_trips,
> + BUILD_BUG_ON(ARRAY_SIZE(default_thermal_module_trips) !=
> + MLXSW_THERMAL_NUM_TRIPS);
> + memcpy(module_tz->trips, default_thermal_module_trips,
> sizeof(thermal->trips));
> memcpy(module_tz->cooling_states, default_cooling_states,
> sizeof(thermal->cooling_states));
> - /* Initialize all trip point. */
> - mlxsw_thermal_module_trips_reset(module_tz);
> - /* Read module temperature and thresholds. */
> - mlxsw_thermal_module_temp_and_thresholds_get(core, area->slot_index,
> - sensor_index, &dummy_temp,
> - &crit_temp, &emerg_temp);
> - /* Update trip point according to the module data. */
> - return mlxsw_thermal_module_trips_update(dev, core, module_tz,
> - crit_temp, emerg_temp);
> + return 0;
> }
>
> static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs
Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Powered by blists - more mailing lists