diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 14:30:53.533297000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 11:56:20.549185000 -0800 @@ -1,4 +1,4 @@ -obj-y = mce_$(BITS).o therm_throt.o +obj-y = mce_$(BITS).o therm_throt.o mce_thermal.o obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2007-12-11 14:30:53.559298000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2007-12-28 12:35:16.151106000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,8 @@ #include #include #include +#include +#include #define PFX "mce_threshold: " #define VERSION "version 1.1.1" @@ -46,6 +49,16 @@ #define MASK_ERR_COUNT_HI 0x00000FFF #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 +#define THERM_CTL_F3X64 0x64 +#define PSL_APIC_LO_EN 0x80 +#define PSL_APIC_HI_EN 0x40 +#define HTC_ACTIVE 0x10 +#define HTC_EN 1 +#define NB_PCI_DEV_BASE 0x18 + +static int smp_thermal_interrupt_init(void); +static int thermal_apic_init_allowed; +static void thermal_apic_init(void *unused); struct threshold_block { unsigned int block; @@ -644,19 +657,19 @@ static void threshold_remove_device(unsi } /* get notified when a cpu comes on/off */ -static int threshold_cpu_callback(struct notifier_block *nfb, +static int amd_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* cpu was unsigned int to begin with */ unsigned int cpu = (unsigned long)hcpu; - if (cpu >= NR_CPUS) - goto out; - switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: threshold_create_device(cpu); + if (thermal_apic_init_allowed) + smp_call_function_single(cpu, + &thermal_apic_init, NULL, 1, 0); break; case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -665,12 +678,11 @@ static int threshold_cpu_callback(struct default: break; } - out: return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier = { - .notifier_call = threshold_cpu_callback, +static struct notifier_block amd_cpu_notifier = { + .notifier_call = amd_cpu_callback, }; static __init int threshold_init_device(void) @@ -683,8 +695,185 @@ static __init int threshold_init_device( if (err) return err; } - register_hotcpu_notifier(&threshold_cpu_notifier); + register_hotcpu_notifier(&amd_cpu_notifier); return 0; } device_initcall(threshold_init_device); + +/* + * AMD-specific thermal interrupt handler. + */ +void amd_smp_thermal_interrupt(void) +{ + unsigned int cpu = smp_processor_id(); + + /* + * We're here because thermal throttling has just been activated -- not + * deactivated -- hence therm_throt_process(1). + */ + if (therm_throt_process(1)) + mce_log_therm_throt_event(cpu, 1); + /* + * We'll still get subsequent interrupts even if we don't clear the + * status bit in THERM_CTL_F3X64. Take advantage of this fact to avoid + * touching PCI space. (If this assumption fails at some point, we'll + * need to schedule_work() in order to enter a process context, so that + * PCI locks can be asserted for proper access. This requirement, in + * turn, creates the need to remember which core interrupted, as the + * core which ultimately takes the scheduled work may be different. + * With any luck, we'll never need to do this.) + */ +} + +/* + * Initialize each northbridge's thermal throttling logic. + */ +static void smp_thermal_northbridge_init(void) +{ + int nb_num; + u32 therm_ctl_f3x64; + + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + /* + * Configure the thermal interrupt for this northbridge. + */ + pci_read_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, &therm_ctl_f3x64); + therm_ctl_f3x64 |= PSL_APIC_HI_EN | HTC_EN; + therm_ctl_f3x64 &= (~PSL_APIC_LO_EN); + pci_write_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, therm_ctl_f3x64); + printk(KERN_INFO "Northbridge at PCI device 0x%x: " + "thermal monitoring enabled.\n", NB_PCI_DEV_BASE + + nb_num); + } +} + +/* + * Enable the delivery of thermal interrupts via the local APIC. + */ +static void thermal_apic_init(void *unused) +{ + unsigned int apic_lv_therm; + + /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */ + apic_lv_therm = apic_read(APIC_LVTTHMR); + /* + * See if some agent other than this routine has already initialized + * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that + * we would have programmed, had we been here before on this core. + */ + if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm & + (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED | + THERMAL_APIC_VECTOR))) { + unsigned int cpu = smp_processor_id(); + + printk(KERN_CRIT "CPU 0x%x: Thermal monitoring not " + "functional.\n", cpu); + if ((apic_lv_therm & APIC_MODE_MASK) == APIC_DM_SMI) + printk(KERN_DEBUG "Thermal interrupts already " + "handled by SMI according to (((local APIC " + "base) + 0x330) bit 0x9).\n"); + else + printk(KERN_DEBUG "Thermal interrupts unexpectedly " + "enabled at (((local APIC base) + 0x330) bit " + "0x10).\n"); + } else { + /* + * Configure the Local Thermal Vector Table Entry to issue + * issue thermal interrupts to THERMAL_APIC_VECTOR. + * + * Start by masking off Delivery Mode and Vector. + */ + apic_lv_therm &= ~(APIC_MODE_MASK | APIC_VECTOR_MASK); + /* Fixed interrupt, masked for now. */ + apic_lv_therm |= APIC_LVT_MASKED | APIC_DM_FIXED | + THERMAL_APIC_VECTOR; + apic_write(APIC_LVTTHMR, apic_lv_therm); + /* + * The Intel thermal kernel code implies that there may be a + * race involving the mask bit, so clear it only now, after + * the other bits have settled. + */ + apic_write(APIC_LVTTHMR, apic_lv_therm & ~APIC_LVT_MASKED); + } +} + +/* +* This function is intended to be called just after thermal throttling has + * been enabled. It warns the user if throttling is already active, which + * could indicate a failed cooling system. It may be the last chance to get + * a warning out before thermal shutdown occurs. + */ +static void smp_thermal_early_throttle_check(void) +{ + int nb_num; + u32 therm_ctl_f3x64; + + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + /* + * Read back THERM_CTL_F3X64 to check whther HTC_ACTIVE is + * asserted, in which case, warn the user. + */ + pci_read_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, &therm_ctl_f3x64); + if (therm_ctl_f3x64 & HTC_ACTIVE) + printk(KERN_WARNING "High temperature on northbridge " + "at PCI device 0x%x. Throttling enabled.\n", + NB_PCI_DEV_BASE + nb_num); + } +} + +/* + * Determine whether or not the northbridges support thermal throttling + * interrupts. If so, initialize them for receiving the same, then perform + * corresponding APIC initialization on each core. + */ +static int smp_thermal_interrupt_init(void) +{ + int nb_num; + + if (num_k8_northbridges == 0) + goto out; + /* + * If any of the northbridges has PCI ID 0x1103, then its thermal + * hardware suffers from an erratum which prevents this code from + * working, so abort. + */ + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + if ((k8_northbridges[nb_num]->device) == 0x1103) + goto out; + } + /* + * Assert that we should log thermal throttling events, whenever + * we eventually get around to enabling them. + */ + atomic_set(&therm_throt_en, 1); + /* + * Bind cpu_specific_smp_thermal_interrupt() to + * amd_smp_thermal_interrupt(). + */ + cpu_specific_smp_thermal_interrupt = amd_smp_thermal_interrupt; + smp_thermal_northbridge_init(); + /* + * We've now initialized sufficient fabric to permit the + * initialization of the thermal interrupt APIC vectors, such as + * when a core comes online and calls amd_cpu_callback(). + */ + thermal_apic_init_allowed = 1; + /* + * Call thermal_apic_init() on each core. + */ + on_each_cpu(&thermal_apic_init, NULL, 1, 0); + smp_thermal_early_throttle_check(); +out: + return 0; +} + +/* + * smp_thermal_interrupt_init cannot execute until PCI has been fully + * initialized, hence late_initcall(). + */ +late_initcall(smp_thermal_interrupt_init); + diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2007-12-11 14:30:53.564303000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2007-12-28 11:40:31.289704000 -0800 @@ -13,21 +13,15 @@ #include #include -asmlinkage void smp_thermal_interrupt(void) +void intel_smp_thermal_interrupt(void) { __u64 msr_val; - ack_APIC_irq(); - - exit_idle(); - irq_enter(); - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); if (therm_throt_process(msr_val & 1)) mce_log_therm_throt_event(smp_processor_id(), msr_val); add_pda(irq_thermal_count, 1); - irq_exit(); } static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) @@ -75,6 +69,11 @@ static void __cpuinit intel_init_thermal wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); l = apic_read(APIC_LVTTHMR); + /* + * Bind the cpu_specific_smp_thermal_interrupt trampoline to + * intel_smp_thermal_interrupt. + */ + cpu_specific_smp_thermal_interrupt = intel_smp_thermal_interrupt; apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", cpu, tm2 ? "TM2" : "TM1"); diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c 1969-12-31 16:00:00.000000000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c 2007-12-28 11:41:47.454730000 -0800 @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2007 Google Inc. + * + * Written by Mike Waychison and Russell Leidich + * . + * + * CPU-independent thermal interrupt handler. + */ + +#include +#include +#include "mce.h" +#include +#include + +static void default_smp_thermal_interrupt(void) +{ + if (printk_ratelimit()) + printk(KERN_DEBUG "Unexpected thermal throttling interrupt.\n"); +} + +cpu_specific_smp_thermal_interrupt_callback_t cpu_specific_smp_thermal_interrupt + = default_smp_thermal_interrupt; + +/* + * Wrapper for the CPU-specific thermal interrupt service routine. Without + * this, we'd have to discern the CPU brand at runtime (because support could + * be installed for more than one). + */ +asmlinkage void smp_thermal_interrupt(void) +{ + ack_APIC_irq(); + exit_idle(); + irq_enter(); + cpu_specific_smp_thermal_interrupt(); + irq_exit(); +} diff -uprN /home/rml/linux-2.6.24-rc5.orig/include/asm-x86/mce.h /home/rml/linux-2.6.24-rc5/include/asm-x86/mce.h --- /home/rml/linux-2.6.24-rc5.orig/include/asm-x86/mce.h 2007-12-11 14:31:34.263515000 -0800 +++ /home/rml/linux-2.6.24-rc5/include/asm-x86/mce.h 2007-12-28 11:43:07.659992000 -0800 @@ -113,7 +113,10 @@ static inline void mce_amd_feature_init( #endif void mce_log_therm_throt_event(unsigned int cpu, __u64 status); +typedef void (*cpu_specific_smp_thermal_interrupt_callback_t)(void); +extern cpu_specific_smp_thermal_interrupt_callback_t + cpu_specific_smp_thermal_interrupt; extern atomic_t mce_entry; extern void do_machine_check(struct pt_regs *, long);