[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d5076272-ce3c-4a09-92db-f096d78d0706@roeck-us.net>
Date: Tue, 31 Oct 2023 09:30:49 -0700
From: Guenter Roeck <linux@...ck-us.net>
To: David Ober <dober6023@...il.com>, linux-hwmon@...r.kernel.org
Cc: linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
jdelvare@...e.com, corbet@....net, dober@...ovo.com,
mpearson-lenovo@...ebb.ca
Subject: Re: [PATCH v2] hwmon:Add MEC172x Micro Chip driver for Lenovo
motherboards
On 10/31/23 08:49, David Ober wrote:
> This addition adds in the ability for the system to scan the
> MEC172x EC chip in Lenovo ThinkStation systems to get the
> current fan RPM speeds and the Maximum speed value for each
> fan also provides the current CPU and DIMM thermal status
>
> Signed-off-by: David Ober <dober6023@...il.com>
>
> v2 fixed mixcased naming
> v2 add mutex protection
> v2 removed references to ACPI as it is not used
> v2 added comment to explain why returning a -1 is needed
> ---
> drivers/hwmon/Kconfig | 10 +
> drivers/hwmon/Makefile | 1 +
> drivers/hwmon/lenovo-ec-sensors.c | 484 ++++++++++++++++++++++++++++++
> 3 files changed, 495 insertions(+)
> create mode 100644 drivers/hwmon/lenovo-ec-sensors.c
>
> diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
> index ec38c8892158..821741ec0d2f 100644
> --- a/drivers/hwmon/Kconfig
> +++ b/drivers/hwmon/Kconfig
> @@ -862,6 +862,16 @@ config SENSORS_LAN966X
> This driver can also be built as a module. If so, the module
> will be called lan966x-hwmon.
>
> +config SENSORS_LENOVO_EC
> + tristate "Microchip MEC172X Chip for Lenovo ThinkStation"
> + depends on I2C
> + help
> + If you say yes here you get support for LENOVO
> + EC Sensors on newer ThinkStation systems
> +
> + This driver can also be built as a module. If so, the module
> + will be called lenovo_ec_sensors.
> +
> config SENSORS_LINEAGE
> tristate "Lineage Compact Power Line Power Entry Module"
> depends on I2C
> diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
> index 4ac9452b5430..aa3c2dc390ec 100644
> --- a/drivers/hwmon/Makefile
> +++ b/drivers/hwmon/Makefile
> @@ -104,6 +104,7 @@ obj-$(CONFIG_SENSORS_JC42) += jc42.o
> obj-$(CONFIG_SENSORS_K8TEMP) += k8temp.o
> obj-$(CONFIG_SENSORS_K10TEMP) += k10temp.o
> obj-$(CONFIG_SENSORS_LAN966X) += lan966x-hwmon.o
> +obj-$(CONFIG_SENSORS_LENOVO_EC) += lenovo-ec-sensors.o
> obj-$(CONFIG_SENSORS_LINEAGE) += lineage-pem.o
> obj-$(CONFIG_SENSORS_LOCHNAGAR) += lochnagar-hwmon.o
> obj-$(CONFIG_SENSORS_LM63) += lm63.o
> diff --git a/drivers/hwmon/lenovo-ec-sensors.c b/drivers/hwmon/lenovo-ec-sensors.c
> new file mode 100644
> index 000000000000..e2b14b3aea08
> --- /dev/null
> +++ b/drivers/hwmon/lenovo-ec-sensors.c
> @@ -0,0 +1,484 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * HWMON driver for MEC172x chip that publishes some sensor values
> + * via the embedded controller registers specific to Lenovo Systems.
> + *
> + * Copyright (C) 2023 David Ober (Lenovo) <dober@...ovo.com>
> + *
> + * EC provides:
> + * - CPU temperature
> + * - DIMM temperature
> + * - Chassis zone temperatures
> + * - CPU fan RPM
> + * - DIMM fan RPM
> + * - Chassis fans RPM
> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/delay.h>
> +#include <linux/hwmon.h>
> +#include <linux/ioport.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/units.h>
> +
> +#define MCHP_SING_IDX 0x0000
> +#define MCHP_EMI0_APPLICATION_ID 0x090C
> +#define MCHP_EMI0_EC_ADDRESS_LSB 0x0902
> +#define MCHP_EMI0_EC_ADDRESS_MSB 0x0903
> +#define MCHP_EMI0_EC_DATA_BYTE0 0x0904
> +#define MCHP_EMI0_EC_DATA_BYTE1 0x0905
> +#define MCHP_EMI0_EC_DATA_BYTE2 0x0906
> +#define MCHP_EMI0_EC_DATA_BYTE3 0x0907
> +
> +#define io_write8(a, b) outb_p(b, a)
> +#define io_read8(a) inb_p(a)
> +
> +static inline uint8_t
> +get_ec_reg(unsigned char page, unsigned char index)
> +{
> + u8 onebyte;
> + unsigned short m_index;
> + unsigned short phy_index = page * 256 + index;
> +
> + if (io_read8(MCHP_EMI0_APPLICATION_ID) != 0) /* EMI access locked */
> + return -1;
> +
Whatever the explanation is, returning -1 from a function declared as
returning uint8_t doesn't make sense. Not checking the return
code in the calling function makes even less sense.
> + io_write8(MCHP_EMI0_APPLICATION_ID, 0x01);
> +
> + m_index = phy_index & 0x7FFC;
> + io_write8(MCHP_EMI0_EC_ADDRESS_LSB, m_index);
> + io_write8(MCHP_EMI0_EC_ADDRESS_MSB, m_index >> 8);
> +
> + switch (phy_index & 0x0003) {
> + case 0:
> + onebyte = io_read8(MCHP_EMI0_EC_DATA_BYTE0);
> + break;
> + case 1:
> + onebyte = io_read8(MCHP_EMI0_EC_DATA_BYTE1);
> + break;
> + case 2:
> + onebyte = io_read8(MCHP_EMI0_EC_DATA_BYTE2);
> + break;
> + case 3:
> + onebyte = io_read8(MCHP_EMI0_EC_DATA_BYTE3);
> + break;
> + }
> +
> + io_write8(MCHP_EMI0_APPLICATION_ID, 0x01); /* write same data to clean */
> + return onebyte;
> +}
> +
> +static const char * const systems[] = {
> + "Tomcat",
> + "Hornet",
> + "Falcon",
> + "Manta_",
> +};
> +
> +static const char * const lenovo_px_ec_temp_label[] = {
> + "CPU1",
> + "CPU2",
> + "R_DIMM1",
> + "L_DIMM1",
> + "R_DIMM2",
> + "L_DIMM2",
> + "PCH",
> + "M2_R",
> + "M2_Z1R",
> + "M2_Z2R",
> + "PCI_Z1",
> + "PCI_Z2",
> + "PCI_Z3",
> + "PCI_Z4",
> + "AMB",
> +};
> +
> +static const char * const lenovo_gen_ec_temp_label[] = {
> + "CPU1",
> + "",
> + "R_DIMM",
> + "L_DIMM",
> + "",
> + "",
> + "PCH",
> + "M2_R",
> + "M2_Z1R",
> + "M2_Z2R",
> + "PCI_Z1",
> + "PCI_Z2",
> + "PCI_Z3",
> + "PCI_Z4",
> + "AMB",
> +};
> +
> +static const char * const px_ec_fan_label[] = {
> + "CPU1_Fan",
> + "CPU2_Fan",
> + "Front_Fan1-1",
> + "Front_Fan1-2",
> + "Front_Fan2",
> + "Front_Fan3",
> + "MEM_Fan1",
> + "MEM_Fan2",
> + "Rear_Fan1",
> + "Rear_Fan2",
> + "Flex_Bay_Fan1",
> + "Flex_Bay_Fan2",
> + "Flex_Bay_Fan2",
> + "PSU_HDD_Fan",
> + "PSU1_Fan",
> + "PSU2_Fan",
> +};
> +
> +static const char * const p7_ec_fan_label[] = {
> + "CPU1_Fan",
> + "",
> + "HP_CPU_Fan1",
> + "HP_CPU_Fan2",
> + "PCIE1_4_Fan",
> + "PCIE5_7_Fan",
> + "MEM_Fan1",
> + "MEM_Fan2",
> + "Rear_Fan1",
> + "",
> + "BCB_Fan",
> + "Flex_Bay_Fan",
> + "",
> + "",
> + "PSU_Fan",
> + "",
> +};
> +
> +static const char * const p5_ec_fan_label[] = {
> + "CPU_Fan",
> + "",
> + "",
> + "",
> + "",
> + "HDD_Fan",
> + "Duct_Fan1",
> + "MEM_Fan",
> + "Rear_Fan",
> + "",
> + "Front_Fan",
> + "Flex_Bay_Fan",
> + "",
> + "",
> + "PSU_Fan",
> + "",
> +};
> +
> +static const char * const p7_amd_ec_fan_label[] = {
> + "CPU1_Fan",
> + "CPU2_Fan",
> + "HP_CPU_Fan1",
> + "HP_CPU_Fan2",
> + "PCIE1_4_Fan",
> + "PCIE5_7_Fan",
> + "DIMM1_Fan1",
> + "DIMM1_Fan2",
> + "DIMM2_Fan1",
> + "DIMM2_Fan2",
> + "Rear_Fan",
> + "HDD_Bay_Fan",
> + "Flex_Bay_Fan",
> + "",
> + "PSU_Fan",
> + "",
> +};
> +
> +struct ec_sensors_data {
> + struct mutex mec_mutex; /* lock for sensors write */
> + u8 platform_id;
> + const char *const *fan_labels;
> + const char *const *temp_labels;
> +};
> +
> +static int
> +lenovo_ec_do_read_temp(struct ec_sensors_data *data, u32 attr, int channel, long *val)
> +{
> + u8 LSB;
> +
> + switch (attr) {
> + case hwmon_temp_input:
> + mutex_lock(&data->mec_mutex);
> + LSB = get_ec_reg(2, 0x81 + channel);
> + mutex_unlock(&data->mec_mutex);
> + if (LSB > 0x40) {
> + *val = (LSB - 0x40) * 1000;
> + } else {
> + *val = 0;
> + return -1;
No. Return valid Linux error codes. Also, error handling
should come first, and writing 0 into *val when returning
an error is useless.
Also, get_ec_reg() returns 0xff if "EMI access locked",
which is treated as valid here (and elsewhere). This doesn't
make sense to me.
> + }
> + return 0;
> + default:
> + break;
> + }
> + return -EOPNOTSUPP;
> +}
> +
> +static int
> +lenovo_ec_do_read_fan(struct ec_sensors_data *data, u32 attr, int channel, long *val)
> +{
> + u8 LSB, MSB;
> +
> + channel *= 2;
> + switch (attr) {
> + case hwmon_fan_input:
> + mutex_lock(&data->mec_mutex);
> + LSB = get_ec_reg(4, 0x60 + channel);
> + MSB = get_ec_reg(4, 0x61 + channel);
> + mutex_unlock(&data->mec_mutex);
> + if ((MSB << 8) + LSB != 0) {
This is confusing. Why not just "if (MSB || LSB)" ?
> + mutex_lock(&data->mec_mutex);
> + LSB = get_ec_reg(4, 0x20 + channel);
> + MSB = get_ec_reg(4, 0x21 + channel);
> + mutex_unlock(&data->mec_mutex);
> + *val = (MSB << 8) + LSB;
> + return 0;
> + }
> + return -1; /* Returning -1 here has the sensors tool mark the FAN speed as N/A */
-1 translates to -EPERM. No, this is unacceptable.
Please return valid Linux error codes. If you _really_ think
that this should return -EPERM, explain in detail (and use -EPERM).
FWIW, there is -ENODATA which is displayed in the "sensors"
command as N/A. Maybe that is what you want.
> + case hwmon_fan_max:
> + mutex_lock(&data->mec_mutex);
> + LSB = get_ec_reg(4, 0x60 + channel);
> + MSB = get_ec_reg(4, 0x61 + channel);
> + mutex_unlock(&data->mec_mutex);
> + if ((MSB << 8) + LSB != 0) {
> + mutex_lock(&data->mec_mutex);
> + LSB = get_ec_reg(4, 0x40 + channel);
> + MSB = get_ec_reg(4, 0x41 + channel);
> + mutex_unlock(&data->mec_mutex);
> + *val = (MSB << 8) + LSB;
> + } else {
> + *val = 0;
> + }
> + return 0;
> + case hwmon_fan_min:
> + case hwmon_fan_div:
> + case hwmon_fan_alarm:
> + break;
> + default:
> + break;
> + }
> + return -EOPNOTSUPP;
> +}
> +
> +static int get_platform(struct ec_sensors_data *data)
> +{
> + char system_type[6];
> + int ret = -1;
> + int idx;
> +
> + for (idx = 0 ; idx < 6 ; idx++)
> + mutex_lock(&data->mec_mutex);
> + system_type[idx] = get_ec_reg(0xC, (0x10 + idx));
Please avoid unnecessary ( ).
> + mutex_unlock(&data->mec_mutex);
> +
> + for (idx = 0 ; idx < 4 ; idx++) {
> + if (strcmp(systems[idx], system_type) == 0) {
Use "!strcmp()".
The length of system_type[] is 6, all fields are written,
and the data is not initialized. There is no guarantee that the
value at string[6] is '\0', and either case strcmp() will read
past the end of the array.
> + ret = idx;
> + break;
> + }
> + }
> + return ret;
> +}
> +
> +static int
> +lenovo_ec_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type,
> + u32 attr, int channel, const char **str)
> +{
> + struct ec_sensors_data *state = dev_get_drvdata(dev);
> +
> + switch (type) {
> + case hwmon_temp:
> + *str = state->temp_labels[channel];
> + break;
> +
> + case hwmon_fan:
> + *str = state->fan_labels[channel];
Some of those strings are empty. Does that mean the fans don't exist ?
If so, don't instantiate them. If it is simply unknown what the fans
do, don't instantiate the attribute either. Returning an empty
string as "label" is not acceptable.
> + break;
> + default:
> + return -EOPNOTSUPP; /* unreachable */
Kind of inconsistent comment. Please drop.
> + }
> + return 0;
> +}
> +
> +static int
> +lenovo_ec_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
> + u32 attr, int channel, long *val)
> +{
> + struct ec_sensors_data *data = dev_get_drvdata(dev);
> +
> + switch (type) {
> + case hwmon_temp:
> + return lenovo_ec_do_read_temp(data, attr, channel, val);
> + case hwmon_fan:
> + return lenovo_ec_do_read_fan(data, attr, channel, val);
> + default:
> + return -EOPNOTSUPP;
> + }
> +
> + return 0;
> +}
> +
> +static umode_t
> +lenovo_ec_hwmon_is_visible(const void *data, enum hwmon_sensor_types type,
> + u32 attr, int channel)
> +{
> + switch (type) {
> + case hwmon_temp:
> + if (attr == hwmon_temp_input || attr == hwmon_temp_label)
> + return 0444;
> + break;
> + case hwmon_fan:
> + if (attr == hwmon_fan_input || attr == hwmon_fan_max || attr == hwmon_fan_label)
> + return 0444;
> + break;
> + default:
> + return 0;
> + }
> + return 0;
> +}
> +
> +static const struct hwmon_channel_info *lenovo_ec_hwmon_info[] = {
> + HWMON_CHANNEL_INFO(temp,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL),
> + HWMON_CHANNEL_INFO(fan,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX,
> + HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_MAX),
> + NULL
> +};
> +
> +static const struct hwmon_ops lenovo_ec_hwmon_ops = {
> + .is_visible = lenovo_ec_hwmon_is_visible,
> + .read = lenovo_ec_hwmon_read,
> + .read_string = lenovo_ec_hwmon_read_string,
> +};
> +
> +static struct hwmon_chip_info lenovo_ec_chip_info = {
> + .ops = &lenovo_ec_hwmon_ops,
> + .info = lenovo_ec_hwmon_info,
> +};
> +
> +static int lenovo_ec_probe(struct platform_device *pdev)
> +{
> + struct device *hwdev;
> + struct ec_sensors_data *ec_data;
> + const struct hwmon_chip_info *chip_info;
> + struct device *dev = &pdev->dev;
> +
> + ec_data = devm_kzalloc(dev, sizeof(struct ec_sensors_data), GFP_KERNEL);
> + if (!ec_data)
> + return -ENOMEM;
> +
> + dev_set_drvdata(dev, ec_data);
> +
> + chip_info = &lenovo_ec_chip_info;
> +
> + mutex_lock(&ec_data->mec_mutex);
mutex_init() missing.
> + if (io_read8(MCHP_EMI0_APPLICATION_ID) != 0) { /* check EMI Application BIT */
> + io_write8(0x90C, io_read8(0x90C)); /* set EMI Application BIT to 0 */
> + }
> + io_write8(MCHP_EMI0_EC_ADDRESS_LSB, MCHP_SING_IDX);
> + io_write8(MCHP_EMI0_EC_ADDRESS_MSB, MCHP_SING_IDX >> 8);
This is really unacceptable. It pretty much immediately writes
into i/o registers. I am quite sure this would crash or have
undesirable impact on lots of non-Lenovo systems.
Also, the driver should reserve the register i/o range it uses, not just
use it blindly.
> + mutex_unlock(&ec_data->mec_mutex);
> +
> + if ((io_read8(MCHP_EMI0_EC_DATA_BYTE0) == 'M') &&
> + (io_read8(MCHP_EMI0_EC_DATA_BYTE1) == 'C') &&
> + (io_read8(MCHP_EMI0_EC_DATA_BYTE2) == 'H') &&
> + (io_read8(MCHP_EMI0_EC_DATA_BYTE3) == 'P')) {
> + ec_data->platform_id = get_platform(ec_data);
> + switch (ec_data->platform_id) {
> + case 0:
> + ec_data->fan_labels = px_ec_fan_label;
> + ec_data->temp_labels = lenovo_px_ec_temp_label;
> + break;
> + case 1:
> + ec_data->fan_labels = p7_ec_fan_label;
> + ec_data->temp_labels = lenovo_gen_ec_temp_label;
> + break;
> + case 2:
> + ec_data->fan_labels = p5_ec_fan_label;
> + ec_data->temp_labels = lenovo_gen_ec_temp_label;
> + break;
> + case 3:
> + ec_data->fan_labels = p7_amd_ec_fan_label;
> + ec_data->temp_labels = lenovo_gen_ec_temp_label;
There is only a single set of temperature labels. ec_data->temp_labels
is therefore unnecessary and lenovo_gen_ec_temp_label can be used directly.
> + break;
> + default:
> + dev_err(dev, "Unknown ThinkStation Model");
Maybe better "Unsupported"
> + return -EINVAL;
-ENODEV;
> + }
> +
> + hwdev = devm_hwmon_device_register_with_info(dev, "lenovo_ec",
> + ec_data,
> + chip_info, NULL);
> +
> + return PTR_ERR_OR_ZERO(hwdev);
> + }
> + return -ENODEV;
Error handling should come first.
> +}
> +
> +static struct platform_driver lenovo_ec_sensors_platform_driver = {
> + .driver = {
> + .name = "lenovo-ec-sensors",
> + },
> + .probe = lenovo_ec_probe,
> +};
> +
> +static struct platform_device *lenovo_ec_sensors_platform_device;
> +
> +static int __init lenovo_ec_init(void)
> +{
> + lenovo_ec_sensors_platform_device =
> + platform_create_bundle(&lenovo_ec_sensors_platform_driver,
> + lenovo_ec_probe, NULL, 0, NULL, 0);
> +
> + if (IS_ERR(lenovo_ec_sensors_platform_device))
> + return PTR_ERR(lenovo_ec_sensors_platform_device);
> +
> + return 0;
> +}
Isn't there a means to determine if this is a supported Lenovo system
prior to calling the probe function ? DMI data, maybe ?
> +
> +static void __exit lenovo_ec_exit(void)
> +{
> + platform_device_unregister(lenovo_ec_sensors_platform_device);
> + platform_driver_unregister(&lenovo_ec_sensors_platform_driver);
> +}
> +
> +module_init(lenovo_ec_init);
> +module_exit(lenovo_ec_exit);
> +
> +MODULE_AUTHOR("David Ober <dober@...ovo.com>");
> +MODULE_DESCRIPTION("HWMON driver for MEC172x EC sensors on LENOVO motherboards");
> +MODULE_LICENSE("GPL");
Powered by blists - more mailing lists