[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4778FFE2.9090008@cosmosbay.com>
Date: Mon, 31 Dec 2007 15:42:42 +0100
From: Eric Dumazet <dada1@...mosbay.com>
To: David Miller <davem@...emloft.net>
CC: haoki@...hat.com, herbert@...dor.apana.org.au,
netdev@...r.kernel.org, tyasui@...hat.com, mhiramat@...hat.com,
satoshi.oshima.fk@...achi.com, billfink@...dspring.com,
andi@...stfloor.org, johnpol@....mipt.ru,
shemminger@...ux-foundation.org, yoshfuji@...ux-ipv6.org,
yumiko.sugita.yf@...achi.com
Subject: Re: [PATCH 1/3] [UDP]: add udp_mem, udp_rmem_min and udp_wmem_min
David Miller a écrit :
> From: Eric Dumazet <dada1@...mosbay.com>
> Date: Mon, 31 Dec 2007 09:54:32 +0100
>
>> Maybe I read the patch incorrectly, or we could add some new sysctl so that
>> we not try to uncharge memory if a socket 'forward_alloc' is beyond a given
>> limit (say 2 pages), so that number of atomic_inc/dec on udp_memory_allocated
>> (or tcp_memory_allocated) is reduced.
>
> This is what we should be striving for, using forward_alloc
> as much as possible as a "cache" to avoid the atomics on
> the global var as much as possible.
Thank you for this confirmation David, I understand now that tcp doesnt
currently satisfy the contract.
For example, tcp_delack_timer() calls sk_mem_reclaim().
So on a machine with a lot of mostly idle sockets (but all sockets are doing
some trafic, say one message per minute / socket), we can see :
$ grep TCP /proc/net/sockstat
TCP: inuse 1083667 orphan 8840 tw 6646 alloc 1083809 mem 262305
$ cat /proc/sys/net/ipv4/tcp_mem
2000000 3000000 4000000
so an average of 1/4 page are 'allocated' per socket :(
On this machine, we constantly change tcp_memory_allocated, even if we always
are under tcp_mem[0] limit.
Maybe we need to introduce some mechanism to let sk_forward between 0 and
SK_MEM_QUANTUM (inclusive).
static inline void sk_mem_reclaim_overpage(struct sock *sk)
{
if (sk->sk_forward_alloc > SK_MEM_QUANTUM) {
__sk_mem_reclaim(sk);
}
}
and use sk_mem_reclaim_overpage() instead of sk_mem_reclaim() in
tcp_delack_timer() ?
Thank you
Small program output :
$ gcc -o prog prog.c ; ./prog
TCP: inuse 1035 orphan 0 tw 271 alloc 1203 mem 16
TCP: inuse 1035 orphan 0 tw 271 alloc 1203 mem 4016
TCP: inuse 1034 orphan 0 tw 272 alloc 1202 mem 3015
TCP: inuse 1034 orphan 0 tw 272 alloc 1202 mem 3016
TCP: inuse 1034 orphan 0 tw 272 alloc 1202 mem 3516
TCP: inuse 1034 orphan 0 tw 272 alloc 1202 mem 14 <OOPS>
$ cat prog.c
#include <sys/socket.h>
#include <string.h>
#include <stdlib.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>
#include <errno.h>
#include <asm/ioctls.h>
#include <stdio.h>
int SOCK_COUNT = 1000;
int *sockets_fd_tab;
unsigned int count;
static void open_sockets(int domain, int type)
{
int fdlisten=-1, on = 1;
socklen_t addrlen;
struct sockaddr_in host, peer;
if (domain == AF_INET && type == SOCK_STREAM) {
fdlisten = socket(AF_INET, type, 0);
setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int));
memset(&host, 0, sizeof(host));
host.sin_family = AF_INET;
bind(fdlisten, (struct sockaddr *)&host, sizeof(host));
addrlen = sizeof(host);
getsockname(fdlisten, (struct sockaddr *)&host, &addrlen);
listen(fdlisten, 5);
}
while (1) {
int res, vec[2];
if (domain == AF_UNIX) {
res = socketpair(AF_UNIX, type, 0, vec);
if (res == -1)
break;
} else {
vec[0] = socket(AF_INET, type, 0);
if (vec[0] == -1)
break;
ioctl(vec[0], FIONBIO, &on);
if (type == SOCK_STREAM) {
connect(vec[0], (struct sockaddr *)&host,
sizeof(host));
addrlen = sizeof(peer);
vec[1] = accept(fdlisten, (struct sockaddr
*)&peer, &addrlen);
if (vec[1] == -1) {
close(vec[0]);
break;
}
} else {
}
}
sockets_fd_tab[count++] = vec[0];
sockets_fd_tab[count++] = vec[1];
if (count == SOCK_COUNT)
break;
}
}
const char some_msg[1024] = "One dummy message";
static void fill_sockets()
{
unsigned int ui;
for (ui = 0; ui < count; ui++)
send(sockets_fd_tab[ui], some_msg, 100, 0);
for (ui = 0; ui < count; ui++)
send(sockets_fd_tab[ui], some_msg, 100, 0);
}
static void empty_sockets()
{
unsigned int ui;
char buffer[4096];
for (ui = 0; ui < count; ui++)
recv(sockets_fd_tab[ui], buffer, sizeof(buffer), 0);
}
static void dump_infos()
{
system("grep TCP /proc/net/sockstat");
}
int main(int argc, char *argv[])
{
int c;
while ((c = getopt(argc, argv, "n:")) != EOF) {
if (c == 'n')
SOCK_COUNT = atoi(optarg);
}
sockets_fd_tab = malloc(SOCK_COUNT * sizeof(int));
open_sockets(AF_INET, SOCK_STREAM);
dump_infos();
fill_sockets();
dump_infos();
sleep(1); /* to see effect of delayed acks */
dump_infos();
empty_sockets();
dump_infos();
fill_sockets();
dump_infos();
empty_sockets();
sleep(1); /* to see effect of delayed acks */
dump_infos();
return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists