[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1273267137.2325.31.camel@edumazet-laptop>
Date: Fri, 07 May 2010 23:18:57 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: Bhaskar Dutta <bhaskie@...il.com>
Cc: Stephen Hemminger <shemminger@...tta.com>,
Ben Hutchings <bhutchings@...arflare.com>,
netdev@...r.kernel.org, David Miller <davem@...emloft.net>
Subject: Re: TCP-MD5 checksum failure on x86_64 SMP
Le vendredi 07 mai 2010 à 17:44 +0200, Eric Dumazet a écrit :
> Le vendredi 07 mai 2010 à 17:18 +0200, Eric Dumazet a écrit :
> > OK, I found the second problem.
> >
> > if/when IP route cache is invalidated, ip_queue_xmit() has to refetch a
> > route and calls sk_setup_caps(sk, &rt->u.dst), destroying the
> >
> > sk->sk_route_caps &= ~NETIF_F_GSO_MASK
> >
> > that MD5 desesperatly try to make all over its way (from
> > tcp_transmit_skb() for example)
> >
> > So we send few bad packets, and everything is fine when
> > tcp_transmit_skb() is called again.
> >
> > You get many errors on remote peer if you do
> >
> > ip route flush cache
> >
Patch solves the problem for me. I tested it with 200 MD5 sockets
established between two 16 cpus machine, with a multiqueue NIC. Trafic
of 100.000 pps per second, and "ip route flush cache" every minute on
both machines. After five hours, not a single frame had a bad hash
value.
Here is the official submission.
[PATCH] net: Introduce sk_route_nocaps
TCP-MD5 sessions have intermittent failures, when route cache is
invalidated. ip_queue_xmit() has to find a new route, calls
sk_setup_caps(sk, &rt->u.dst), destroying the
sk->sk_route_caps &= ~NETIF_F_GSO_MASK
that MD5 desperately try to make all over its way (from
tcp_transmit_skb() for example)
So we send few bad packets, and everything is fine when
tcp_transmit_skb() is called again for this socket.
Since ip_queue_xmit() is at a lower level than TCP-MD5, I chose to use a
socket field, sk_route_nocaps, containing bits to mask on sk_route_caps.
Reported-by: Bhaskar Dutta <bhaskie@...il.com>
Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
---
include/net/sock.h | 8 ++++++++
net/core/sock.c | 1 +
net/ipv4/tcp_ipv4.c | 6 +++---
net/ipv4/tcp_output.c | 2 +-
net/ipv6/tcp_ipv6.c | 4 ++--
5 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 1ad6435..abfadfe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -177,6 +177,7 @@ struct sock_common {
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
* @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
+ * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_lingertime: %SO_LINGER l_linger setting
@@ -276,6 +277,7 @@ struct sock {
int sk_forward_alloc;
gfp_t sk_allocation;
int sk_route_caps;
+ int sk_route_nocaps;
int sk_gso_type;
unsigned int sk_gso_max_size;
int sk_rcvlowat;
@@ -1257,6 +1259,12 @@ static inline int sk_can_gso(const struct sock *sk)
extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
+static inline void sk_nocaps_add(struct sock *sk, int flags)
+{
+ sk->sk_route_nocaps |= flags;
+ sk->sk_route_caps &= ~flags;
+}
+
static inline int skb_copy_to_page(struct sock *sk, char __user *from,
struct sk_buff *skb, struct page *page,
int off, int copy)
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bb..5056a6a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1227,6 +1227,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+ sk->sk_route_caps &= ~sk->sk_route_nocaps;
if (sk_can_gso(sk)) {
if (dst->header_len) {
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3c23e70..f1a1dd9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -894,7 +894,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
kfree(newkey);
return -ENOMEM;
}
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
if (tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
@@ -1024,7 +1024,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
return -EINVAL;
tp->md5sig_info = p;
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
@@ -1465,7 +1465,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (newkey != NULL)
tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
newkey, key->keylen);
- newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
#endif
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dda86e..0193a39 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -872,7 +872,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
tp->af_specific->calc_md5_hash(opts.hash_location,
md5, sk, NULL, skb);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 075f540..bf34893 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -600,7 +600,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
kfree(newkey);
return -ENOMEM;
}
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
if (tcp_alloc_md5sig_pool(sk) == NULL) {
kfree(newkey);
@@ -737,7 +737,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
return -ENOMEM;
tp->md5sig_info = p;
- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+ sk_nocaps_add(sk, NETIF_F_GSO_MASK);
}
newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists