[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20070219231447.GA4400@outpost.ds9a.nl>
Date: Tue, 20 Feb 2007 00:14:47 +0100
From: bert hubert <bert.hubert@...herlabs.nl>
To: netdev@...r.kernel.org
Subject: nonblocking UDPv4 recvfrom() taking 4usec @ 3GHz?
Hi people,
I'm trying to save people the cost of buying extra servers by making
PowerDNS (GPL) ever faster, but I've hit a rather fundamental problem.
Linux 2.6.20-rc4 appears to take 4 microseconds on my P4 3GHz for a
non-blocking UDPv4 recvfrom() call, both on loopback and ethernet.
Linux 2.6.18 on my 64 bit Athlon64 3200+ takes a similar amount of time.
This seems like rather a lot for a 50 byte datagram, but perhaps I'm
overestimating your abilities :-)
The program is unthreaded, and I measure like this:
#define RDTSC(qp) \
do { \
unsigned long lowPart, highPart; \
__asm__ __volatile__("rdtsc" : "=a" (lowPart), "=d" (highPart)); \
qp = (((unsigned long long) highPart) << 32) | lowPart; \
} while (0)
...
uint64_t tsc1, tsc2;
RDTSC(tsc1);
if((len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
RDTSC(tsc2);
printf("%f\n", (tsc2-tsc1)/3000.0); // 3GHz P4
}
gdb generates the following dump from the actual program,
x=_Z20handleNewUDPQuestioniRN5boost3anyE, I see nothing untoward happening
between the two 'rdtsc' opcodes.
0x08091de0 <x+0>: push %ebp
0x08091de1 <x+1>: mov %esp,%ebp
0x08091de3 <x+3>: push %edi
0x08091de4 <x+4>: push %esi
0x08091de5 <x+5>: push %ebx
0x08091de6 <x+6>: sub $0x78c,%esp
0x08091dec <x+12>: mov %gs:0x14,%eax
0x08091df2 <x+18>: mov %eax,0xffffffe4(%ebp)
0x08091df5 <x+21>: xor %eax,%eax
0x08091df7 <x+23>: movw $0x2,0xffffffac(%ebp)
0x08091dfd <x+29>: movl $0x0,0xffffffb0(%ebp)
0x08091e04 <x+36>: movw $0x0,0xffffffae(%ebp)
0x08091e0a <x+42>: movl $0x1c,0xfffff8f4(%ebp)
0x08091e14 <x+52>: rdtsc
0x08091e16 <x+54>: mov %edx,%ebx
0x08091e18 <x+56>: mov 0x8(%ebp),%edx
0x08091e1b <x+59>: mov %eax,%esi
0x08091e1d <x+61>: lea 0xfffff8f4(%ebp),%eax
0x08091e23 <x+67>: mov %eax,0x14(%esp)
0x08091e27 <x+71>: lea 0xffffffac(%ebp),%ecx
0x08091e2a <x+74>: lea 0xfffff950(%ebp),%eax
0x08091e30 <x+80>: mov %ecx,0x10(%esp)
0x08091e34 <x+84>: movl $0x0,0xc(%esp)
0x08091e3c <x+92>: movl $0x5dc,0x8(%esp)
0x08091e44 <x+100>: mov %eax,0x4(%esp)
0x08091e48 <x+104>: mov %edx,(%esp)
0x08091e4b <x+107>: call 0x8192110 <recvfrom>
0x08091e50 <x+112>: test %eax,%eax
0x08091e52 <x+114>: mov %eax,0xfffff8b0(%ebp)
0x08091e58 <x+120>: js 0x8092168 <x+904>
0x08091e5e <x+126>: mov %ebx,%eax
0x08091e60 <x+128>: xor %edx,%edx
0x08091e62 <x+130>: mov %eax,%edx
0x08091e64 <x+132>: mov $0x0,%eax
0x08091e69 <x+137>: mov %esi,%ecx
0x08091e6b <x+139>: mov %eax,%esi
0x08091e6d <x+141>: or %ecx,%esi
0x08091e6f <x+143>: mov %edx,%edi
0x08091e71 <x+145>: rdtsc
0x08091e73 <x+147>: mov %eax,0xfffff8a0(%ebp)
0x08091e79 <x+153>: mov 0xfffff8a0(%ebp),%eax
0x08091e7f <x+159>: mov %edx,%ecx
0x08091e81 <x+161>: xor %ebx,%ebx
0x08091e83 <x+163>: mov %ecx,%ebx
recvfrom itself is a tad worrisome, x=recvfrom. I didn't ask for the
'libc_enable_asynccancel' stuff. I'm trying to isolate the actual syscall
but it is proving hard work for an assemnly newbie like me - socketcall
doesn't make things easier.
0xb7d62410 <x+0>: cmpl $0x0,%gs:0xc
0xb7d62418 <x+8>: jne 0xb7d62439 <x+41>
0xb7d6241a <x+10>: mov %ebx,%edx
0xb7d6241c <x+12>: mov $0x66,%eax
0xb7d62421 <x+17>: mov $0xc,%ebx
0xb7d62426 <x+22>: lea 0x4(%esp),%ecx
0xb7d6242a <x+26>: call *%gs:0x10
0xb7d62431 <x+33>: mov %edx,%ebx
0xb7d62433 <x+35>: cmp $0xffffff83,%eax
0xb7d62436 <x+38>: jae 0xb7d62469 <x+89>
0xb7d62438 <x+40>: ret
0xb7d62439 <x+41>: push %esi
0xb7d6243a <x+42>: call 0xb7d6ddd0 <__libc_enable_asynccancel>
0xb7d6243f <x+47>: mov %eax,%esi
0xb7d62441 <x+49>: mov %ebx,%edx
0xb7d62443 <x+51>: mov $0x66,%eax
0xb7d62448 <x+56>: mov $0xc,%ebx
0xb7d6244d <x+61>: lea 0x8(%esp),%ecx
0xb7d62451 <x+65>: call *%gs:0x10
0xb7d62458 <x+72>: mov %edx,%ebx
0xb7d6245a <x+74>: xchg %eax,%esi
0xb7d6245b <x+75>: call 0xb7d6dd90 <__libc_disable_asynccancel>
0xb7d62460 <x+80>: mov %esi,%eax
0xb7d62462 <x+82>: pop %esi
0xb7d62463 <x+83>: cmp $0xffffff83,%eax
0xb7d62466 <x+86>: jae 0xb7d62469 <x+89>
0xb7d62468 <x+88>: ret
0xb7d62469 <x+89>: call 0xb7d998f8 <__i686.get_pc_thunk.cx>
0xb7d6246e <x+94>: add $0x61b86,%ecx
0xb7d62474 <x+100>: mov 0xffffff2c(%ecx),%ecx
0xb7d6247a <x+106>: xor %edx,%edx
0xb7d6247c <x+108>: sub %eax,%edx
0xb7d6247e <x+110>: mov %edx,%gs:(%ecx)
0xb7d62481 <x+113>: or $0xffffffff,%eax
0xb7d62484 <x+116>: jmp 0xb7d62438 <x+40>
Any clues?
--
http://www.PowerDNS.com Open source, database driven DNS Software
http://netherlabs.nl Open and Closed source services
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists