[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20070822003834.GB1400@Krystal>
Date: Tue, 21 Aug 2007 20:38:34 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...ymtl.ca>
To: Andi Kleen <andi@...stfloor.org>
Cc: Christoph Lameter <clameter@....com>, akpm@...ux-foundation.org,
linux-kernel@...r.kernel.org, mingo@...hat.com
Subject: Re: [PATCH] SLUB use cmpxchg_local
* Andi Kleen (andi@...stfloor.org) wrote:
> Mathieu Desnoyers <mathieu.desnoyers@...ymtl.ca> writes:
> >
> > The measurements I get (in cycles):
> > enable interrupts (STI) disable interrupts (CLI) local CMPXCHG
> > IA32 (P4) 112 82 26
> > x86_64 AMD64 125 102 19
>
> What exactly did you benchmark here? On K8 CLI/STI are only supposed
> to be a few cycles. pushf/popf might me more expensive, but not that much.
>
Hi Andi,
I benchmarked cmpxchg_local vs local_irq_save/local_irq_restore.
Details, and code, follow.
* cpuinfo:
processor : 0
vendor_id : AuthenticAMD
cpu family : 15
model : 35
model name : AMD Athlon(tm)64 X2 Dual Core Processor 3800+
stepping : 2
cpu MHz : 2009.204
cache size : 512 KB
physical id : 0
siblings : 2
core id : 0
cpu cores : 2
fpu : yes
fpu_exception : yes
cpuid level : 1
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt lm 3dnowext 3dnow pni lahf_lm cmp_legacy
bogomips : 4023.38
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 40 bits physical, 48 bits virtual
power management: ts fid vid ttp
processor : 1
vendor_id : AuthenticAMD
cpu family : 15
model : 35
model name : AMD Athlon(tm)64 X2 Dual Core Processor 3800+
stepping : 2
cpu MHz : 2009.204
cache size : 512 KB
physical id : 0
siblings : 2
core id : 1
cpu cores : 2
fpu : yes
fpu_exception : yes
cpuid level : 1
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt lm 3dnowext 3dnow pni lahf_lm cmp_legacy
bogomips : 4018.49
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 40 bits physical, 48 bits virtual
power management: ts fid vid ttp
* Test ran:
/* test-cmpxchg-nolock.c
*
* Compare local cmpxchg with irq disable / enable.
*/
#include <linux/jiffies.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/calc64.h>
#include <asm/timex.h>
#include <asm/system.h>
#define NR_LOOPS 20000
int test_val;
static void do_test_cmpxchg(void)
{
int ret;
long flags;
unsigned int i;
cycles_t time1, time2, time;
long rem;
local_irq_save(flags);
preempt_disable();
time1 = get_cycles();
for (i = 0; i < NR_LOOPS; i++) {
ret = cmpxchg_local(&test_val, 0, 0);
}
time2 = get_cycles();
local_irq_restore(flags);
preempt_enable();
time = time2 - time1;
printk(KERN_ALERT "test results: time for non locked cmpxchg\n");
printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
printk(KERN_ALERT "total time: %llu\n", time);
time = div_long_long_rem(time, NR_LOOPS, &rem);
printk(KERN_ALERT "-> non locked cmpxchg takes %llu cycles\n", time);
printk(KERN_ALERT "test end\n");
}
/*
* This test will have a higher standard deviation due to incoming interrupts.
*/
static void do_test_enable_int(void)
{
long flags;
unsigned int i;
cycles_t time1, time2, time;
long rem;
local_irq_save(flags);
preempt_disable();
time1 = get_cycles();
for (i = 0; i < NR_LOOPS; i++) {
local_irq_restore(flags);
}
time2 = get_cycles();
local_irq_restore(flags);
preempt_enable();
time = time2 - time1;
printk(KERN_ALERT "test results: time for enabling interrupts (STI)\n");
printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
printk(KERN_ALERT "total time: %llu\n", time);
time = div_long_long_rem(time, NR_LOOPS, &rem);
printk(KERN_ALERT "-> enabling interrupts (STI) takes %llu cycles\n",
time);
printk(KERN_ALERT "test end\n");
}
static void do_test_disable_int(void)
{
unsigned long flags, flags2;
unsigned int i;
cycles_t time1, time2, time;
long rem;
local_irq_save(flags);
preempt_disable();
time1 = get_cycles();
for ( i = 0; i < NR_LOOPS; i++) {
local_irq_save(flags2);
}
time2 = get_cycles();
local_irq_restore(flags);
preempt_enable();
time = time2 - time1;
printk(KERN_ALERT "test results: time for disabling interrupts (CLI)\n");
printk(KERN_ALERT "number of loops: %d\n", NR_LOOPS);
printk(KERN_ALERT "total time: %llu\n", time);
time = div_long_long_rem(time, NR_LOOPS, &rem);
printk(KERN_ALERT "-> disabling interrupts (CLI) takes %llu cycles\n",
time);
printk(KERN_ALERT "test end\n");
}
static int ltt_test_init(void)
{
printk(KERN_ALERT "test init\n");
do_test_cmpxchg();
do_test_enable_int();
do_test_disable_int();
return -EAGAIN; /* Fail will directly unload the module */
}
static void ltt_test_exit(void)
{
printk(KERN_ALERT "test exit\n");
}
module_init(ltt_test_init)
module_exit(ltt_test_exit)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mathieu Desnoyers");
MODULE_DESCRIPTION("Cmpxchg vs int Test");
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists