[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20091210210920.6820.12664.stgit@paris.rdu.redhat.com>
Date: Thu, 10 Dec 2009 16:09:20 -0500
From: Eric Paris <eparis@...hat.com>
To: netdev@...r.kernel.org
Cc: eparis@...hat.com
Subject: [PATCH] net: export the number of times the recv queue was full
We got a request in which a customer was trying to determine how often their
recieve queue was full and thus they were sending a zero window back to the
other side. By the time they would notice the slowdowns they would have all
empty receive queues and wouldn't know which socket was a problem. It also
allows them to find the sockets in which they need to up the recv queue size
rather than doing it for all sockets across the box. This patch exports that
information via /proc/net/tcp.
# cat /proc/net/tcp
sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
[snip]
10: D7E70B0A:0016 E4E70B0A:E0BD 01 00000000:00000000 02:000AF288 00000000 0 0 12598 4 ffff88003e4c1e00 20 3 17 5 -1 0
11: 0100007F:2710 0100007F:B91D 01 00000000:000113DA 02:000AEFA6 00000000 0 0 12592 2 ffff88003e7b2800 20 11 0 2 -1 8
Signed-off-by: Eric Paris <eparis@...hat.com>
---
Documentation/networking/proc_net_tcp.txt | 4 +++-
include/linux/tcp.h | 3 +++
net/ipv4/tcp.c | 3 +++
net/ipv4/tcp_ipv4.c | 5 ++++-
net/ipv4/tcp_output.c | 4 +++-
net/ipv6/tcp_ipv6.c | 10 +++++++---
6 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/Documentation/networking/proc_net_tcp.txt b/Documentation/networking/proc_net_tcp.txt
index 4a79209..03375f4 100644
--- a/Documentation/networking/proc_net_tcp.txt
+++ b/Documentation/networking/proc_net_tcp.txt
@@ -24,7 +24,9 @@ up into 3 parts because of the length of the line):
| |----------------------> receive-queue
|-------------------------------> transmit-queue
- 1000 0 54165785 4 cd1e6040 25 4 27 3 -1
+ 1000 0 54165785 4 cd1e6040 25 4 27 3 -1 12
+ | | | | | | | | | | |--> number of times socket
+ | | | | | | | | | | replied with zero window
| | | | | | | | | |--> slow start size threshold,
| | | | | | | | | or -1 if the threshold
| | | | | | | | | is >= 0xFFFF
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7fee8a4..b50627d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -163,6 +163,8 @@ struct tcp_info {
__u32 tcpi_rcv_space;
__u32 tcpi_total_retrans;
+
+ __u32 tcpi_rcv_wnd_zero_cnt;
};
/* for TCP_MD5SIG socket option */
@@ -376,6 +378,7 @@ struct tcp_sock {
u32 rcv_wnd; /* Current receiver window */
u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
u32 pushed_seq; /* Last pushed seq, required to talk to windows */
+ u32 rcv_wnd_zero_cnt;
u32 lost_out; /* Lost packets */
u32 sacked_out; /* SACK'd packets */
u32 fackets_out; /* FACK'd packets */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0a26bb..3fa846e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2061,6 +2061,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->snd_cwnd_cnt = 0;
tp->bytes_acked = 0;
tp->window_clamp = 0;
+ tp->rcv_wnd_zero_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
@@ -2432,6 +2433,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_space = tp->rcvq_space.space;
info->tcpi_total_retrans = tp->total_retrans;
+
+ info->tcpi_rcv_wnd_zero_cnt = tp->rcv_wnd_zero_cnt;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 15e9603..d61ecb8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1885,6 +1885,8 @@ static int tcp_v4_init_sock(struct sock *sk)
* cookie_in_always, cookie_out_never,
* s_data_constant, s_data_in, s_data_out
*/
+ tp->rcv_wnd_zero_cnt = 0;
+
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -2343,7 +2345,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
- "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
+ "%08X %5d %8d %lu %d %p %lu %lu %u %u %d %u%n",
i, src, srcp, dest, destp, sk->sk_state,
tp->write_seq - tp->snd_una,
rx_queue,
@@ -2359,6 +2361,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
+ tp->rcv_wnd_zero_cnt,
len);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 93316a9..337676d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -277,8 +277,10 @@ static u16 tcp_select_window(struct sock *sk)
new_win >>= tp->rx_opt.rcv_wscale;
/* If we advertise zero window, disable fast path. */
- if (new_win == 0)
+ if (new_win == 0) {
+ tp->rcv_wnd_zero_cnt++;
tp->pred_flags = 0;
+ }
return new_win;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ee9cf62..8f6c992 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1925,6 +1925,8 @@ static int tcp_v6_init_sock(struct sock *sk)
* cookie_in_always, cookie_out_never,
* s_data_constant, s_data_in, s_data_out
*/
+ tp->rcv_wnd_zero_cnt = 0;
+
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -2010,8 +2012,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
}
seq_printf(seq,
- "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n",
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X %02X "
+ "%08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u "
+ "%d %u\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -2031,7 +2034,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
+ tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
+ tp->rcv_wnd_zero_cnt
);
}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists