--- linux-2.6.20-rc1-mm1/include/linux/time.h 2006-12-15 15:46:16.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/include/linux/time.h 2006-12-15 17:22:15.000000000 +0100 @@ -88,22 +88,46 @@ static inline struct timespec timespec_s #define timespec_valid(ts) \ (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) -extern struct timespec xtime; -extern struct timespec wall_to_monotonic; -extern seqlock_t xtime_lock __attribute__((weak)); + +/* + * Structure time_data holds most timekeeping related variables + * to make them share as few cache lines as possible. + */ +struct time_data { + /* + * We want to alias jiffies_64 to time_data._jiffies_64, + * in vmlinux.lds.S files (jiffies_64 = time_data) + * so keep _jiffies_64 at the beginning of time_data + */ + u64 _jiffies_64; + struct timespec _xtime; + struct timespec _wall_to_monotonic; + seqlock_t lock; + int calc_load_count; + unsigned int _avenrun[3]; +}; +/* + * time_data has weak attribute because some arches (x86_64) want + * to have a read_only version in their vsyscall page + */ +extern struct time_data time_data __attribute__((weak)); +#define xtime time_data._xtime +#define wall_to_monotonic time_data._wall_to_monotonic +#define xtime_lock time_data.lock +#define avenrun time_data._avenrun extern unsigned long read_persistent_clock(void); void timekeeping_init(void); static inline unsigned long get_seconds(void) { - return xtime.tv_sec; + return time_data._xtime.tv_sec; } struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) -#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 }) +#define CURRENT_TIME_SEC ((struct timespec) { time_data._xtime.tv_sec, 0 }) extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); --- linux-2.6.20-rc1-mm1/include/linux/jiffies.h 2006-12-15 15:46:16.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/include/linux/jiffies.h 2006-12-15 17:19:07.000000000 +0100 @@ -78,7 +78,7 @@ * without sampling the sequence number in xtime_lock. * get_jiffies_64() will do this for you as appropriate. */ -extern u64 __jiffy_data jiffies_64; +extern u64 __jiffy_data jiffies_64 __attribute__((weak)); extern unsigned long volatile __jiffy_data jiffies; #if (BITS_PER_LONG < 64) --- linux-2.6.20-rc1-mm1/include/linux/sched.h 2006-12-15 15:46:16.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/include/linux/sched.h 2006-12-15 17:19:07.000000000 +0100 @@ -106,7 +106,6 @@ struct bio; * the EXP_n values would be 1981, 2034 and 2043 if still using only * 11 bit fractions. */ -extern unsigned long avenrun[]; /* Load averages */ #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1<>= FSHIFT; + { \ + unsigned long temp = load; \ + temp *= exp; \ + temp += n*(FIXED_1-exp); \ + load = temp >> FSHIFT; \ + } extern unsigned long total_forks; extern int nr_threads; --- linux-2.6.20-rc1-mm1/kernel/timer.c 2006-12-15 15:46:16.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/kernel/timer.c 2006-12-15 17:19:07.000000000 +0100 @@ -42,7 +42,7 @@ #include #include -u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; +__attribute__((weak)) u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); @@ -771,10 +771,12 @@ unsigned long next_timer_interrupt(void) * however, we will ALWAYS keep the tv_nsec part positive so we can use * the usual normalization. */ -struct timespec xtime __attribute__ ((aligned (16))); -struct timespec wall_to_monotonic __attribute__ ((aligned (16))); - -EXPORT_SYMBOL(xtime); +__attribute__((weak)) struct time_data time_data __cacheline_aligned = { + ._jiffies_64 = INITIAL_JIFFIES, + .lock = __SEQLOCK_UNLOCKED(time_data.lock), + .calc_load_count = LOAD_FREQ, +}; +EXPORT_SYMBOL(time_data); /* XXX - all of this timekeeping code should be later moved to time.c */ @@ -1238,9 +1240,6 @@ static unsigned long count_active_tasks( * * Requires xtime_lock to access. */ -unsigned long avenrun[3]; - -EXPORT_SYMBOL(avenrun); /* * calc_load - given tick count, update the avenrun load estimates. @@ -1249,29 +1248,20 @@ EXPORT_SYMBOL(avenrun); static inline void calc_load(unsigned long ticks) { unsigned long active_tasks; /* fixed-point */ - static int count = LOAD_FREQ; - count -= ticks; - if (unlikely(count < 0)) { + time_data.calc_load_count -= ticks; + if (unlikely(time_data.calc_load_count < 0)) { active_tasks = count_active_tasks(); do { - CALC_LOAD(avenrun[0], EXP_1, active_tasks); - CALC_LOAD(avenrun[1], EXP_5, active_tasks); - CALC_LOAD(avenrun[2], EXP_15, active_tasks); - count += LOAD_FREQ; - } while (count < 0); + CALC_LOAD(time_data._avenrun[0], EXP_1, active_tasks); + CALC_LOAD(time_data._avenrun[1], EXP_5, active_tasks); + CALC_LOAD(time_data._avenrun[2], EXP_15, active_tasks); + time_data.calc_load_count += LOAD_FREQ; + } while (time_data.calc_load_count < 0); } } /* - * This read-write spinlock protects us from races in SMP while - * playing with xtime and avenrun. - */ -__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); - -EXPORT_SYMBOL(xtime_lock); - -/* * This function runs timers and the timer-tq in bottom half context. */ static void run_timer_softirq(struct softirq_action *h) @@ -1497,6 +1487,16 @@ asmlinkage long sys_gettid(void) } /** + * avenrun_to_loads - convert an _avenrun[] to sysinfo.loads[] + * @idx: index (0..2) in time_data._avenrun[] + */ +static inline unsigned long avenrun_to_loads(unsigned int idx) +{ + unsigned long ret = time_data._avenrun[idx]; + return ret << (SI_LOAD_SHIFT - FSHIFT); +} + +/** * sys_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill */ @@ -1529,9 +1529,9 @@ asmlinkage long sys_sysinfo(struct sysin } val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[0] = avenrun_to_loads(0); + val.loads[1] = avenrun_to_loads(1); + val.loads[2] = avenrun_to_loads(2); val.procs = nr_threads; } while (read_seqretry(&xtime_lock, seq)); --- linux-2.6.20-rc1-mm1/include/asm-x86_64/vsyscall.h 2006-12-15 15:46:16.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/include/asm-x86_64/vsyscall.h 2006-12-15 17:19:07.000000000 +0100 @@ -18,11 +18,9 @@ enum vsyscall_num { #define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16))) #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) -#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) #define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16))) #define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16))) -#define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16))) -#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(16))) +#define __section_time_data __attribute__ ((unused, __section__ (".time_data"), aligned(16))) #define VXTIME_TSC 1 #define VXTIME_HPET 2 @@ -44,12 +42,10 @@ struct vxtime_data { #define hpet_writel(d,a) writel(d, (void __iomem *)fix_to_virt(FIX_HPET_BASE) + a) /* vsyscall space (readonly) */ +extern struct time_data __time_data; extern struct vxtime_data __vxtime; extern int __vgetcpu_mode; -extern struct timespec __xtime; -extern volatile unsigned long __jiffies; extern struct timezone __sys_tz; -extern seqlock_t __xtime_lock; /* kernel space (writeable) */ extern struct vxtime_data vxtime; --- linux-2.6.20-rc1-mm1/arch/x86_64/kernel/vsyscall.c 2006-12-14 02:14:23.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/arch/x86_64/kernel/vsyscall.c 2006-12-15 17:19:07.000000000 +0100 @@ -45,7 +45,6 @@ #define __syscall_clobber "r11","rcx","memory" int __sysctl_vsyscall __section_sysctl_vsyscall = 1; -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; int __vgetcpu_mode __section_vgetcpu_mode; #include @@ -67,10 +66,10 @@ static __always_inline void do_vgettimeo unsigned long sec, usec; do { - sequence = read_seqbegin(&__xtime_lock); + sequence = read_seqbegin(&__time_data.lock); - sec = __xtime.tv_sec; - usec = __xtime.tv_nsec / 1000; + sec = __time_data._xtime.tv_sec; + usec = __time_data._xtime.tv_nsec / 1000; if (__vxtime.mode != VXTIME_HPET) { t = get_cycles_sync(); @@ -84,7 +83,7 @@ static __always_inline void do_vgettimeo fix_to_virt(VSYSCALL_HPET) + 0xf0) - __vxtime.last) * __vxtime.quot) >> 32; } - } while (read_seqretry(&__xtime_lock, sequence)); + } while (read_seqretry(&__time_data.lock, sequence)); tv->tv_sec = sec + usec / 1000000; tv->tv_usec = usec % 1000000; @@ -132,8 +131,8 @@ time_t __vsyscall(1) vtime(time_t *t) if (!__sysctl_vsyscall) return time_syscall(t); else if (t) - *t = __xtime.tv_sec; - return __xtime.tv_sec; + *t = __time_data._xtime.tv_sec; + return __time_data._xtime.tv_sec; } /* Fast way to get current CPU and node. @@ -158,7 +157,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s We do this here because otherwise user space would do it on its own in a likely inferior way (no access to jiffies). If you don't like it pass NULL. */ - if (tcache && tcache->blob[0] == (j = __jiffies)) { + if (tcache && tcache->blob[0] == (j = __time_data._jiffies_64)) { p = tcache->blob[1]; } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ --- linux-2.6.20-rc1-mm1/arch/x86_64/kernel/time.c 2006-12-15 15:46:15.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/arch/x86_64/kernel/time.c 2006-12-15 17:19:07.000000000 +0100 @@ -76,8 +76,12 @@ unsigned long long monotonic_base; struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ -volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -struct timespec __xtime __section_xtime; +struct time_data __time_data __section_time_data = { + ._jiffies_64 = INITIAL_JIFFIES, + .lock = __SEQLOCK_UNLOCKED(time_data.lock), + .calc_load_count = LOAD_FREQ, +}; + struct timezone __sys_tz __section_sys_tz; /* --- linux-2.6.20-rc1-mm1/arch/x86_64/kernel/vmlinux.lds.S 2006-12-15 15:46:15.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/arch/x86_64/kernel/vmlinux.lds.S 2006-12-15 17:22:15.000000000 +0100 @@ -12,7 +12,6 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; _proxy_pda = 0; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ @@ -88,8 +87,10 @@ SECTIONS __vsyscall_0 = VSYSCALL_VIRT_ADDR; . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .xtime_lock : AT(VLOAD(.xtime_lock)) { *(.xtime_lock) } - xtime_lock = VVIRT(.xtime_lock); + .time_data : AT(VLOAD(.time_data)) { *(.time_data) } + time_data = VVIRT(.time_data); + jiffies = time_data; + jiffies_64 = time_data; .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } vxtime = VVIRT(.vxtime); @@ -103,13 +104,6 @@ SECTIONS .sysctl_vsyscall : AT(VLOAD(.sysctl_vsyscall)) { *(.sysctl_vsyscall) } sysctl_vsyscall = VVIRT(.sysctl_vsyscall); - .xtime : AT(VLOAD(.xtime)) { *(.xtime) } - xtime = VVIRT(.xtime); - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } - jiffies = VVIRT(.jiffies); - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } --- linux-2.6.20-rc1-mm1/arch/i386/kernel/vmlinux.lds.S 2006-12-15 15:46:15.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/arch/i386/kernel/vmlinux.lds.S 2006-12-15 17:22:15.000000000 +0100 @@ -25,7 +25,8 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; +jiffies = time_data; +jiffies_64 = time_data; _proxy_pda = 0; PHDRS { --- linux-2.6.20-rc1-mm1/arch/ia64/kernel/asm-offsets.c 2006-12-14 02:14:23.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/arch/ia64/kernel/asm-offsets.c 2006-12-15 17:19:07.000000000 +0100 @@ -268,4 +268,7 @@ void foo(void) DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64); DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32); DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); + DEFINE(IA64_XTIME_OFFSET, offsetof (struct time_data, _xtime)); + DEFINE(IA64_WALL_TO_MONOTONIC_OFFSET, offsetof (struct time_data, _wall_to_monotonic)); + DEFINE(IA64_XTIME_LOCK_OFFSET, offsetof (struct time_data, lock)); } --- linux-2.6.20-rc1-mm1/arch/ia64/kernel/vmlinux.lds.S 2006-12-15 15:46:15.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/arch/ia64/kernel/vmlinux.lds.S 2006-12-15 17:19:07.000000000 +0100 @@ -4,6 +4,7 @@ #include #include #include +#include #define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) #include @@ -16,7 +17,11 @@ OUTPUT_FORMAT("elf64-ia64-little") OUTPUT_ARCH(ia64) ENTRY(phys_start) -jiffies = jiffies_64; +jiffies = time_data; +jiffies_64 = time_data; +xtime = time_data + IA64_XTIME_OFFSET; +wall_to_monotonic = time_data + IA64_WALL_TO_MONOTONIC_OFFSET; +xtime_lock = time_data + IA64_XTIME_LOCK_OFFSET; PHDRS { code PT_LOAD; percpu PT_LOAD; --- linux-2.6.20-rc1-mm1/arch/s390/appldata/appldata_os.c 2006-12-14 02:14:23.000000000 +0100 +++ linux-2.6.20-rc1-mm1-ed/arch/s390/appldata/appldata_os.c 2006-12-15 17:19:07.000000000 +0100 @@ -68,7 +68,7 @@ struct appldata_os_data { u32 nr_running; /* number of runnable threads */ u32 nr_threads; /* number of threads */ - u32 avenrun[3]; /* average nr. of running processes during */ + u32 _avenrun[3]; /* average nr. of running processes during */ /* the last 1, 5 and 15 minutes */ /* New in 2.6 */ @@ -98,11 +98,11 @@ static inline void appldata_print_debug( P_DEBUG("nr_threads = %u\n", os_data->nr_threads); P_DEBUG("nr_running = %u\n", os_data->nr_running); P_DEBUG("nr_iowait = %u\n", os_data->nr_iowait); - P_DEBUG("avenrun(int) = %8x / %8x / %8x\n", os_data->avenrun[0], - os_data->avenrun[1], os_data->avenrun[2]); - a0 = os_data->avenrun[0]; - a1 = os_data->avenrun[1]; - a2 = os_data->avenrun[2]; + P_DEBUG("avenrun(int) = %8x / %8x / %8x\n", os_data->_avenrun[0], + os_data->_avenrun[1], os_data->_avenrun[2]); + a0 = os_data->_avenrun[0]; + a1 = os_data->_avenrun[1]; + a2 = os_data->_avenrun[2]; P_DEBUG("avenrun(float) = %d.%02d / %d.%02d / %d.%02d\n", LOAD_INT(a0), LOAD_FRAC(a0), LOAD_INT(a1), LOAD_FRAC(a1), LOAD_INT(a2), LOAD_FRAC(a2)); @@ -145,9 +145,9 @@ static void appldata_get_os_data(void *d os_data->nr_threads = nr_threads; os_data->nr_running = nr_running(); os_data->nr_iowait = nr_iowait(); - os_data->avenrun[0] = avenrun[0] + (FIXED_1/200); - os_data->avenrun[1] = avenrun[1] + (FIXED_1/200); - os_data->avenrun[2] = avenrun[2] + (FIXED_1/200); + os_data->_avenrun[0] = avenrun[0] + (FIXED_1/200); + os_data->_avenrun[1] = avenrun[1] + (FIXED_1/200); + os_data->_avenrun[2] = avenrun[2] + (FIXED_1/200); j = 0; for_each_online_cpu(i) {