[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20200123223239.GA2070@roeck-us.net>
Date: Thu, 23 Jan 2020 14:32:39 -0800
From: Guenter Roeck <linux@...ck-us.net>
To: linux-hwmon@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, Clemens Ladisch <clemens@...isch.de>,
Jean Delvare <jdelvare@...e.com>,
Ondrej Čerman <ocerman@...1.eu>,
Michael Larabel <michael@...ronix.com>
Subject: Re: [PATCH v3] hwmon: (k10temp) Display up to eight sets of CCD
temperatures
Sorry for the noise - please don't bother with this version.
I'll send v4 in a minute, with improved support for older EPYC CPUs.
Guenter
On Thu, Jan 23, 2020 at 02:04:22PM -0800, Guenter Roeck wrote:
> In HWiNFO, we see support for Tccd1, Tccd3, Tccd5, and Tccd7 temperature
> sensors on Zen2 based Threadripper CPUs. Checking register maps on
> Threadripper 3970X confirms SMN register addresses and values for those
> sensors.
>
> Register values observed in an idle system:
>
> 0x059950: 00000000 00000abc 00000000 00000ad8
> 0x059960: 00000000 00000ade 00000000 00000ae4
>
> Under load:
>
> 0x059950: 00000000 00000c02 00000000 00000c14
> 0x059960: 00000000 00000c30 00000000 00000c22
>
> More analysis shows that EPYC CPUs support up to 8 CCD temperature
> sensors.
>
> On top of that, in thm_10_0_sh_mask.h in the Linux kernel, we find
> definitions for THM_DIE{1-3}_TEMP__VALID_MASK, set to 0x00000800, as well
> as matching SMN addresses. This lets us conclude that bit 11 of the
> respective registers is a valid bit. With this assumption, the temperature
> offset is now 49 degrees C. This conveniently matches the documented
> temperature offset for Tdie, again suggesting that above registers indeed
> report temperatures sensor values. Assume that bit 11 is indeed a valid
> bit, and add support for the additional sensors.
>
> With this patch applied, output from 3970X (idle) looks as follows:
>
> k10temp-pci-00c3
> Adapter: PCI adapter
> Tdie: +55.9°C
> Tctl: +55.9°C
> Tccd1: +39.8°C
> Tccd3: +43.8°C
> Tccd5: +43.8°C
> Tccd7: +44.8°C
>
> Tested-by: Michael Larabel <michael@...ronix.com>
> Signed-off-by: Guenter Roeck <linux@...ck-us.net>
> ---
> v3: Increased number of supported CCD sensors to 8.
> Dropped note that functionality on high-end CPUs is not known,
> since that is no longer correct.
>
> v2: Added additional sensors to k10temp_info[].
>
> drivers/hwmon/k10temp.c | 70 ++++++++++++++++++-----------------------
> 1 file changed, 31 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
> index 5e3f43594084..1634cb6394f0 100644
> --- a/drivers/hwmon/k10temp.c
> +++ b/drivers/hwmon/k10temp.c
> @@ -7,7 +7,7 @@
> * Copyright (c) 2020 Guenter Roeck <linux@...ck-us.net>
> *
> * Implementation notes:
> - * - CCD1 and CCD2 register address information as well as the calculation to
> + * - CCD register address information as well as the calculation to
> * convert raw register values is from https://github.com/ocerman/zenpower.
> * The information is not confirmed from chip datasheets, but experiments
> * suggest that it provides reasonable temperature values.
> @@ -18,11 +18,6 @@
> * normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC
> * current. Reported values can be adjusted using the sensors configuration
> * file.
> - * - It is unknown if the mechanism to read CCD1/CCD2 temperature as well as
> - * current and voltage information works on higher-end Ryzen CPUs.
> - * Information reported by Windows tools suggests that additional sensors
> - * (both temperature and voltage/current) are supported, but their register
> - * location is currently unknown.
> */
>
> #include <linux/bitops.h>
> @@ -80,8 +75,10 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
>
> /* F17h M01h Access througn SMN */
> #define F17H_M01H_REPORTED_TEMP_CTRL_OFFSET 0x00059800
> -#define F17H_M70H_CCD1_TEMP 0x00059954
> -#define F17H_M70H_CCD2_TEMP 0x00059958
> +
> +#define F17H_M70H_CCD_TEMP(x) (0x00059954 + ((x) * 4))
> +#define F17H_M70H_CCD_TEMP_VALID BIT(11)
> +#define F17H_M70H_CCD_TEMP_MASK GENMASK(10, 0)
>
> #define F17H_M01H_SVI 0x0005A000
> #define F17H_M01H_SVI_TEL_PLANE0 (F17H_M01H_SVI + 0xc)
> @@ -100,8 +97,7 @@ struct k10temp_data {
> int temp_offset;
> u32 temp_adjust_mask;
> bool show_tdie;
> - bool show_tccd1;
> - bool show_tccd2;
> + u32 show_tccd;
> u32 svi_addr[2];
> bool show_current;
> int cfactor[2];
> @@ -188,6 +184,12 @@ const char *k10temp_temp_label[] = {
> "Tctl",
> "Tccd1",
> "Tccd2",
> + "Tccd3",
> + "Tccd4",
> + "Tccd5",
> + "Tccd6",
> + "Tccd7",
> + "Tccd8",
> };
>
> const char *k10temp_in_label[] = {
> @@ -277,15 +279,10 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
> if (*val < 0)
> *val = 0;
> break;
> - case 2: /* Tccd1 */
> - amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
> - F17H_M70H_CCD1_TEMP, ®val);
> - *val = (regval & 0xfff) * 125 - 305000;
> - break;
> - case 3: /* Tccd2 */
> + case 2 ... 9: /* Tccd{1-8} */
> amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
> - F17H_M70H_CCD2_TEMP, ®val);
> - *val = (regval & 0xfff) * 125 - 305000;
> + F17H_M70H_CCD_TEMP(channel - 2), ®val);
> + *val = (regval & F17H_M70H_CCD_TEMP_MASK) * 125 - 49000;
> break;
> default:
> return -EOPNOTSUPP;
> @@ -343,12 +340,8 @@ static umode_t k10temp_is_visible(const void *_data,
> if (!data->show_tdie)
> return 0;
> break;
> - case 2: /* Tccd1 */
> - if (!data->show_tccd1)
> - return 0;
> - break;
> - case 3: /* Tccd2 */
> - if (!data->show_tccd2)
> + case 2 ... 9: /* Tccd{1-8} */
> + if (!(data->show_tccd & BIT(channel - 2)))
> return 0;
> break;
> default:
> @@ -382,12 +375,8 @@ static umode_t k10temp_is_visible(const void *_data,
> case 0: /* Tdie */
> case 1: /* Tctl */
> break;
> - case 2: /* Tccd1 */
> - if (!data->show_tccd1)
> - return 0;
> - break;
> - case 3: /* Tccd2 */
> - if (!data->show_tccd2)
> + case 2 ... 9: /* Tccd{1-8} */
> + if (!(data->show_tccd & BIT(channel - 2)))
> return 0;
> break;
> default:
> @@ -520,6 +509,12 @@ static const struct hwmon_channel_info *k10temp_info[] = {
> HWMON_T_LABEL,
> HWMON_T_INPUT | HWMON_T_LABEL,
> HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> + HWMON_T_INPUT | HWMON_T_LABEL,
> HWMON_T_INPUT | HWMON_T_LABEL),
> HWMON_CHANNEL_INFO(in,
> HWMON_I_INPUT | HWMON_I_LABEL,
> @@ -595,15 +590,12 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> data->cfactor[1] = CFACTOR_ISOC;
> data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE1;
> data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE0;
> - amd_smn_read(amd_pci_dev_to_node_id(pdev),
> - F17H_M70H_CCD1_TEMP, ®val);
> - if (regval & 0xfff)
> - data->show_tccd1 = true;
> -
> - amd_smn_read(amd_pci_dev_to_node_id(pdev),
> - F17H_M70H_CCD2_TEMP, ®val);
> - if (regval & 0xfff)
> - data->show_tccd2 = true;
> + for (i = 0; i < 8; i++) {
> + amd_smn_read(amd_pci_dev_to_node_id(pdev),
> + F17H_M70H_CCD_TEMP(i), ®val);
> + if (regval & F17H_M70H_CCD_TEMP_VALID)
> + data->show_tccd |= BIT(i);
> + }
> break;
> }
> } else {
> --
> 2.17.1
>
Powered by blists - more mailing lists