lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <D6D887BA8C9DFF48B5233887EF04654105C1173BCD@bgsmsx502.gar.corp.intel.com>
Date:	Sat, 18 Dec 2010 19:39:59 +0530
From:	"R, Durgadoss" <durgadoss.r@...el.com>
To:	"Yu, Fenghua" <fenghua.yu@...el.com>,
	"khali@...ux-fr.org" <khali@...ux-fr.org>,
	Guenter Roeck <guenter.roeck@...csson.com>,
	"Brown, Len" <len.brown@...el.com>,
	"mingo@...hat.com" <mingo@...hat.com>,
	"hpa@...or.com" <hpa@...or.com>
CC:	"lm-sensors@...sensors.org" <lm-sensors@...sensors.org>,
	"x86@...nel.org" <x86@...nel.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Patch[1/2] Adding_threshold_support_to_coretemp

Hi,

I am submitting a patch to enable core thermal threshold
Support to coretemp.c. There are two core thermal thresholds
available through sysfs interfaces temp1_max and temp1_max_hyst.
The temp1_max_alarm is set when temperature reaches or crosses
above temp1_max or drops below temp1_max_hyst.

This patch is generated against stable Linux-2.6 kernel.

Kindly review and merge.
----------------------------------------------------------------
From: Durgadoss R <durgadoss.r@...el.com>

Date: Sat, 18 Dec 2010 06:45:41 +0530
Subject: PATCH[1/2] Adding_Threshold_Support_to_Coretemp

This patch adds core thermal threshold support to coretemp.
These thresholds can be configured via the sysfs interfaces temp1_max
and temp1_max_hyst. An interrupt is generated when CPU temperature reaches
or crosses above temp1_max and drops below temp1_max_hyst.

Signed-off-by: Durgadoss R <durgadoss.r@...el.com>

---
 Documentation/hwmon/coretemp             |    8 +
 arch/x86/include/asm/mce.h               |    3 +
 arch/x86/include/asm/msr-index.h         |   12 ++
 arch/x86/kernel/cpu/mcheck/therm_throt.c |   40 ++++++
 drivers/hwmon/coretemp.c                 |  214 +++++++++++++++++++++++++++---
 5 files changed, 257 insertions(+), 20 deletions(-)

diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp
index 25568f8..6389292 100644
--- a/Documentation/hwmon/coretemp
+++ b/Documentation/hwmon/coretemp
@@ -29,6 +29,14 @@ the Out-Of-Spec bit. Following table summarizes the exported sysfs files:

 temp1_input     - Core temperature (in millidegrees Celsius).
 temp1_max       - All cooling devices should be turned on (on Core2).
+                  Initialized with IA32_TEMPERATURE_TARGET if supported,
+                  otherwise initialized with (tjmax - 20). When the CPU
+                  temperature reaches this temperature, an interrupt is
+                  generated and temp1_max_alarm is set.
+temp1_max_hyst  - If the CPU temperature falls below than temperature,
+                  an interrupt is generated and temp1_max_alarm is reset.
+temp1_max_alarm - Set if the temperature reaches or exceeds temp1_max.
+                  Reset if the temperature drops to or below temp1_max_hyst.
 temp1_crit      - Maximum junction temperature (in millidegrees Celsius).
 temp1_crit_alarm - Set when Out-of-spec bit is set, never clears.
                   Correct CPU operation is no longer guaranteed.
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c62c13c..eb16e94 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c);

 void mce_log_therm_throt_event(__u64 status);

+/* Interrupt Handler for core thermal thresholds */
+extern int (*platform_thermal_notify)(__u64 msr_val);
+
 #ifdef CONFIG_X86_THERMAL_VECTOR
 extern void mcheck_intel_therm_init(void);
 #else
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3ea3dc4..a9de090 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -253,6 +253,18 @@
 #define PACKAGE_THERM_INT_LOW_ENABLE           (1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE           (1 << 24)

+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
+#define THERM_SHIFT_THRESHOLD0        8
+#define THERM_MASK_THRESHOLD0          (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE    (1 << 23)
+#define THERM_SHIFT_THRESHOLD1        16
+#define THERM_MASK_THRESHOLD1          (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0        (1 << 6)
+#define THERM_LOG_THRESHOLD0           (1 << 7)
+#define THERM_STATUS_THRESHOLD1        (1 << 8)
+#define THERM_LOG_THRESHOLD1           (1 << 9)
+
 /* MISC_ENABLE bits: architectural */
 #define MSR_IA32_MISC_ENABLE_FAST_STRING       (1ULL << 0)
 #define MSR_IA32_MISC_ENABLE_TCC               (1ULL << 1)
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4b68326..e12246f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -53,8 +53,13 @@ struct thermal_state {
        struct _thermal_state core_power_limit;
        struct _thermal_state package_throttle;
        struct _thermal_state package_power_limit;
+       struct _thermal_state core_thresh0;
+       struct _thermal_state core_thresh1;
 };

+/* Callback to handle core threshold interrupts */
+int (*platform_thermal_notify)(__u64 msr_val);
+
 static DEFINE_PER_CPU(struct thermal_state, thermal_state);

 static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
        return 0;
 }

+static int thresh_event_valid(int event)
+{
+       struct _thermal_state *state;
+       unsigned int this_cpu = smp_processor_id();
+       struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+       u64 now = get_jiffies_64();
+
+       state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
+
+       if (time_before64(now, state->next_check))
+               return 0;
+
+       state->next_check = now + CHECK_INTERVAL;
+       return 1;
+}
+
 #ifdef CONFIG_SYSFS
 /* Add/Remove thermal_throttle interface for CPU device: */
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
 #define PACKAGE_THROTTLED      ((__u64)2 << 62)
 #define PACKAGE_POWER_LIMIT    ((__u64)3 << 62)

+static void notify_thresholds(__u64 msr_val)
+{
+       /* check whether the interrupt handler is defined;
+        * otherwise simply return
+        */
+       if (!platform_thermal_notify)
+               return;
+
+       /* lower threshold reached */
+       if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
+               platform_thermal_notify(msr_val);
+       /* higher threshold reached */
+       if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
+               platform_thermal_notify(msr_val);
+}
+
 /* Thermal transition interrupt handler */
 static void intel_thermal_interrupt(void)
 {
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)

        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);

+       /* Check for violation of core thermal thresholds*/
+       notify_thresholds(msr_val);
+
        if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
                                THERMAL_THROTTLING_EVENT,
                                CORE_LEVEL) != 0)
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 42de98d..1bf8acb 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -36,11 +36,12 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/smp.h>
+#include <asm/mce.h>

 #define DRVNAME        "coretemp"

-typedef enum { SHOW_TEMP, SHOW_TJMAX, SHOW_TTARGET, SHOW_LABEL,
-               SHOW_NAME } SHOW;
+enum attributes { SHOW_TEMP, SHOW_TJMAX, CORE_TTARGET, CORE_TMIN, SHOW_LABEL,
+               SHOW_NAME, SHOW_CRIT_ALARM, SHOW_MAX_ALARM } attrs;

 /*
  * Functions declaration
@@ -59,9 +60,14 @@ struct coretemp_data {
        int temp;
        int tjmax;
        int ttarget;
-       u8 alarm;
+       int tmin;
+       u8 max_alarm;
+       u8 crit_alarm;
 };

+static void update_alarm(struct coretemp_data *data);
+static int set_core_threshold(struct coretemp_data *data, int temp, int thres);
+
 /*
  * Sysfs stuff
  */
@@ -83,9 +89,15 @@ static ssize_t show_name(struct device *dev, struct device_attribute
 static ssize_t show_alarm(struct device *dev, struct device_attribute
                          *devattr, char *buf)
 {
-       struct coretemp_data *data = coretemp_update_device(dev);
-       /* read the Out-of-spec log, never clear */
-       return sprintf(buf, "%d\n", data->alarm);
+       struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+       struct coretemp_data *data = dev_get_drvdata(dev);
+
+       update_alarm(data);
+       if (attr->index == SHOW_CRIT_ALARM)
+               /* read the Out-of-spec log, never clear */
+               return sprintf(buf, "%d\n", data->crit_alarm);
+
+       return sprintf(buf, "%d\n", data->max_alarm);
 }

 static ssize_t show_temp(struct device *dev,
@@ -93,33 +105,67 @@ static ssize_t show_temp(struct device *dev,
 {
        struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
        struct coretemp_data *data = coretemp_update_device(dev);
-       int err;
+       int err = -EINVAL;

-       if (attr->index == SHOW_TEMP)
+       switch (attr->index) {
+       case SHOW_TEMP:
                err = data->valid ? sprintf(buf, "%d\n", data->temp) : -EAGAIN;
-       else if (attr->index == SHOW_TJMAX)
+               break;
+       case SHOW_TJMAX:
                err = sprintf(buf, "%d\n", data->tjmax);
-       else
+               break;
+       case CORE_TTARGET:
                err = sprintf(buf, "%d\n", data->ttarget);
+               break;
+       case CORE_TMIN:
+               err = sprintf(buf, "%d\n", data->tmin);
+               break;
+       }
+
        return err;
 }

+
+static ssize_t store_temp(struct device *dev,
+               struct device_attribute *devattr, const char *buf, size_t count)
+{
+       struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+       struct coretemp_data *data = dev_get_drvdata(dev);
+       unsigned long val;
+       int err;
+
+       if (strict_strtoul(buf, 10, &val))
+               return -EINVAL;
+
+       err = set_core_threshold(data, val, attr->index);
+
+       return err ? err : count;
+}
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
                          SHOW_TEMP);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp, NULL,
                          SHOW_TJMAX);
-static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_temp, NULL,
-                         SHOW_TTARGET);
-static DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL);
+static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp,
+                                               store_temp, CORE_TTARGET);
+static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IWUSR | S_IRUGO, show_temp,
+                                               store_temp, CORE_TMIN);
+static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL,
+                                                       SHOW_CRIT_ALARM);
+static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL,
+                                                       SHOW_MAX_ALARM);
 static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
 static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);

 static struct attribute *coretemp_attributes[] = {
        &sensor_dev_attr_name.dev_attr.attr,
        &sensor_dev_attr_temp1_label.dev_attr.attr,
-       &dev_attr_temp1_crit_alarm.attr,
+       &sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
        &sensor_dev_attr_temp1_input.dev_attr.attr,
        &sensor_dev_attr_temp1_crit.dev_attr.attr,
+       &sensor_dev_attr_temp1_max.dev_attr.attr,
+       &sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
+       &sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
        NULL
 };

@@ -127,6 +173,26 @@ static const struct attribute_group coretemp_group = {
        .attrs = coretemp_attributes,
 };

+static void update_alarm(struct coretemp_data *data)
+{
+       u32 eax, edx;
+
+       rdmsr_on_cpu(data->id, MSR_IA32_THERM_STATUS, &eax, &edx);
+
+       /* Update the critical temperature alarm */
+       data->crit_alarm = (eax >> 5) & 1;
+
+       /* Temperature reached threshold1 */
+       if (eax & THERM_LOG_THRESHOLD1)
+               data->max_alarm = 1;
+       /* Temperature reached threshold0 */
+       else if (eax & THERM_LOG_THRESHOLD0)
+               data->max_alarm = 0;
+       /* If none of these cases, don't update max_alarm */
+
+       return;
+}
+
 static struct coretemp_data *coretemp_update_device(struct device *dev)
 {
        struct coretemp_data *data = dev_get_drvdata(dev);
@@ -138,7 +204,6 @@ static struct coretemp_data *coretemp_update_device(struct device *dev)

                data->valid = 0;
                rdmsr_on_cpu(data->id, MSR_IA32_THERM_STATUS, &eax, &edx);
-               data->alarm = (eax >> 5) & 1;
                /* update only if data has been valid */
                if (eax & 0x80000000) {
                        data->temp = data->tjmax - (((eax >> 16)
@@ -298,6 +363,106 @@ static void __devinit get_ucode_rev_on_cpu(void *edx)
        rdmsr(MSR_IA32_UCODE_REV, eax, *(u32 *)edx);
 }

+/* Platform thermal Interrupt Handler */
+static int coretemp_interrupt(__u64 msr_val)
+{
+
+       if (msr_val & THERM_LOG_THRESHOLD0) {
+               if (!(msr_val & THERM_STATUS_THRESHOLD0))
+                       pr_info("%s:Lower Threshold Reached\n", __func__);
+               /* Reset the Threshold0 interrupt */
+               wrmsrl(MSR_IA32_THERM_STATUS, msr_val & ~THERM_LOG_THRESHOLD0);
+       }
+
+       if (msr_val & THERM_LOG_THRESHOLD1) {
+               if (msr_val & THERM_STATUS_THRESHOLD1)
+                       pr_info("%s:Upper Threshold Reached\n", __func__);
+               /* Reset the Threshold1 interrupt */
+               wrmsrl(MSR_IA32_THERM_STATUS, msr_val & ~THERM_LOG_THRESHOLD1);
+       }
+
+       return 0;
+}
+
+static void configure_apic(void *info)
+{
+       u32 l;
+       int *flag = (int *)info;
+
+       l = apic_read(APIC_LVTTHMR);
+
+       if (*flag)      /* Non-Zero flag Masks the APIC */
+               apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
+       else            /* Zero flag UnMasks the APIC */
+               apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+}
+
+static int set_core_threshold(struct coretemp_data *data, int temp, int thres)
+{
+       u32 eax, edx;
+       int diff;
+       int flag = 1;
+
+       if (temp > data->tjmax)
+               return -EINVAL;
+
+       mutex_lock(&data->update_lock);
+
+       diff = (data->tjmax - temp)/1000;
+
+       /* Mask the APIC */
+       smp_call_function_single(data->id, &configure_apic, &flag, 1);
+
+       rdmsr_on_cpu(data->id, MSR_IA32_THERM_INTERRUPT, &eax, &edx);
+
+       if (thres == CORE_TMIN) {
+               eax = (eax & ~THERM_MASK_THRESHOLD0) |
+                                       (diff << THERM_SHIFT_THRESHOLD0);
+               data->tmin = temp;
+       } else {
+               eax = (eax & ~THERM_MASK_THRESHOLD1) |
+                                       (diff << THERM_SHIFT_THRESHOLD1);
+               data->ttarget = temp;
+       }
+
+       wrmsr_on_cpu(data->id, MSR_IA32_THERM_INTERRUPT, eax, edx);
+
+       /* Unmask the APIC */
+       flag = 0;
+       smp_call_function_single(data->id, &configure_apic, &flag, 1);
+
+       mutex_unlock(&data->update_lock);
+       return 0;
+}
+
+static int __devinit config_thresh_intrpt(struct coretemp_data *data,
+                                                               int enable)
+{
+       u32 eax, edx;
+       int flag = 1; /* Non-Zero Flag masks the apic */
+
+       smp_call_function_single(data->id, &configure_apic, &flag, 1);
+
+       rdmsr_on_cpu(data->id, MSR_IA32_THERM_INTERRUPT, &eax, &edx);
+
+       if (enable) {
+               eax |= (THERM_INT_THRESHOLD0_ENABLE |
+                                               THERM_INT_THRESHOLD1_ENABLE);
+               platform_thermal_notify = coretemp_interrupt;
+       } else {
+               eax &= (~(THERM_INT_THRESHOLD0_ENABLE |
+                                               THERM_INT_THRESHOLD1_ENABLE));
+               platform_thermal_notify = NULL;
+       }
+
+       wrmsr_on_cpu(data->id, MSR_IA32_THERM_INTERRUPT, eax, edx);
+
+       flag = 0; /*Flag should be zero to unmask the apic */
+       smp_call_function_single(data->id, &configure_apic, &flag, 1);
+
+       return 0;
+}
+
 static int __devinit coretemp_probe(struct platform_device *pdev)
 {
        struct coretemp_data *data;
@@ -351,6 +516,10 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
        }

        data->tjmax = get_tjmax(c, data->id, &pdev->dev);
+       /* Initialize ttarget value. If IA32_TEMPERATURE_TARGET is
+        * supported, this value will be over written below
+        */
+       data->ttarget = data->tjmax - 20000;
        platform_set_drvdata(pdev, data);

        /*
@@ -368,13 +537,18 @@ static int __devinit coretemp_probe(struct platform_device *pdev)
                } else {
                        data->ttarget = data->tjmax -
                                        (((eax >> 8) & 0xff) * 1000);
-                       err = device_create_file(&pdev->dev,
-                                       &sensor_dev_attr_temp1_max.dev_attr);
-                       if (err)
-                               goto exit_free;
                }
        }

+       /* Enable threshold interrupt support */
+       config_thresh_intrpt(data, 1);
+
+       /* Set Initial Core thresholds.
+        * The lower and upper threshold values here are assumed
+        */
+       set_core_threshold(data, 0, CORE_TMIN);
+       set_core_threshold(data, data->ttarget, CORE_TTARGET);
+
        if ((err = sysfs_create_group(&pdev->dev.kobj, &coretemp_group)))
                goto exit_dev;

@@ -404,7 +578,7 @@ static int __devexit coretemp_remove(struct platform_device *pdev)

        hwmon_device_unregister(data->hwmon_dev);
        sysfs_remove_group(&pdev->dev.kobj, &coretemp_group);
-       device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr);
+       config_thresh_intrpt(data, 0);
        platform_set_drvdata(pdev, NULL);
        kfree(data);
        return 0;
--
1.6.5.2


Download attachment "0001-Adding_Threshold_Support_to_Coretemp.patch" of type "application/octet-stream" (15029 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ