lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1450718113.8474.132.camel@edumazet-glaptop2.roam.corp.google.com>
Date:	Mon, 21 Dec 2015 12:15:13 -0500
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	Florian Westphal <fw@...len.de>
Cc:	netdev@...r.kernel.org
Subject: Re: [PATCH -next] tcp: honour SO_BINDTODEVICE for TW_RST case too

On Mon, 2015-12-21 at 17:20 +0100, Florian Westphal wrote:
> Hannes points out that when we generate tcp reset for timewait sockets we
> pretend we found no socket and pass NULL sk to tcp_vX_send_reset().
> 
> Make it cope with inet tw sockets and then provide tw sk so RST appears on
> correct interface.
> 
> Packetdrill test case:
> // want default route to be used, we rely on BINDTODEVICE
> `ip route del 192.0.2.0/24 via 192.168.0.2 dev tun0`
> 
> 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
> 0.001 setsockopt(3, SOL_SOCKET, SO_BINDTODEVICE, "tun0", 4) = 0
> 0.100...0.200 connect(3, ..., ...) = 0
> 
> 0.100 > S 0:0(0) <mss 1460,sackOK,nop,nop>
> 0.200 < S. 0:0(0) ack 1 win 32792 <mss 1460,sackOK,nop,nop>
> 0.200 > . 1:1(0) ack 1
> 
> 0.210 close(3) = 0
> 
> 0.210 > F. 1:1(0) ack 1 win 29200
> 0.300 < . 1:1(0) ack 2 win 46
> 
> // more data while in FIN_WAIT2, expect RST
> 1.300 < P. 1:1001(1000) ack 1 win 46
> 
> // fails without this change -- default route is used
> 1.301 > R 1:1(0) win 0
> 
> Reported-by: Hannes Frederic Sowa <hannes@...essinduktion.org>
> Signed-off-by: Florian Westphal <fw@...len.de>
> ---
>  net/ipv4/tcp_ipv4.c      | 31 ++++++++++++++++++++++---------
>  net/ipv4/tcp_minisocks.c |  7 ++-----
>  net/ipv6/tcp_ipv6.c      | 15 +++++++++++----
>  3 files changed, 35 insertions(+), 18 deletions(-)
> 
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 46e92fb..24ba2e1 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -587,13 +587,14 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>  	} rep;
>  	struct ip_reply_arg arg;
>  #ifdef CONFIG_TCP_MD5SIG
> -	struct tcp_md5sig_key *key;
> +	struct tcp_md5sig_key *key = NULL;
>  	const __u8 *hash_location = NULL;
>  	unsigned char newhash[16];
>  	int genhash;
>  	struct sock *sk1 = NULL;
>  #endif
>  	struct net *net;
> +	bool have_full_sk;
>  
>  	/* Never send a reset in response to a reset. */
>  	if (th->rst)
> @@ -624,10 +625,14 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>  	arg.iov[0].iov_base = (unsigned char *)&rep;
>  	arg.iov[0].iov_len  = sizeof(rep.th);
>  
> -	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
> +	have_full_sk = sk && sk_fullsock(sk);
> +	net = have_full_sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);

But the net pointer can be derived from timewait the same, not sure why
you changed this part ... This makes your patch look more complicated
than what is needed.

>  #ifdef CONFIG_TCP_MD5SIG
>  	hash_location = tcp_parse_md5sig_option(th);
> -	if (!sk && hash_location) {
> +	if (have_full_sk) {
> +		key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
> +					&ip_hdr(skb)->saddr, AF_INET);
> +	} else if (hash_location) {
>  		/*
>  		 * active side is lost. Try to find listening socket through
>  		 * source port, and then find md5 key through listening socket.
> @@ -651,10 +656,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>  		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
>  		if (genhash || memcmp(hash_location, newhash, 16) != 0)
>  			goto release_sk1;
> -	} else {
> -		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
> -					     &ip_hdr(skb)->saddr,
> -					     AF_INET) : NULL;
>  	}
>  
>  	if (key) {
> @@ -675,7 +676,14 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>  				      ip_hdr(skb)->saddr, /* XXX */
>  				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
>  	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
> -	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
> +	arg.flags = 0;
> +	if (have_full_sk) {
> +		if (inet_sk(sk)->transparent)
> +			arg.flags = IP_REPLY_ARG_NOSRCCHECK;
> +	} else if (sk && inet_twsk(sk)->tw_transparent) {
> +		arg.flags = IP_REPLY_ARG_NOSRCCHECK;
> +	}
> +

Maybe a helper to retrieve the transparant status from a generic socket
(being full, timewait or request sock) would help.

This could be submitted as a separate patch to ease review.


>  	/* When socket is gone, all binding information is lost.
>  	 * routing might fail in this case. No choice here, if we choose to force
>  	 * input interface, we will misroute in case of asymmetric route.
> @@ -683,6 +691,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
>  	if (sk)
>  		arg.bound_dev_if = sk->sk_bound_dev_if;
>  
> +	BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
> +		     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
> +
>  	arg.tos = ip_hdr(skb)->tos;
>  	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
>  			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
> @@ -1706,7 +1717,9 @@ do_time_wait:
>  		tcp_v4_timewait_ack(sk, skb);
>  		break;
>  	case TCP_TW_RST:
> -		goto no_tcp_socket;
> +		tcp_v4_send_reset(sk, skb);
> +		inet_twsk_deschedule_put(inet_twsk(sk));
> +		goto discard_it;
>  	case TCP_TW_SUCCESS:;
>  	}
>  	goto discard_it;
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index ac6b196..75632a9 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -131,7 +131,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
>  			goto kill;
>  
>  		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
> -			goto kill_with_rst;
> +			return TCP_TW_RST;
>  
>  		/* Dup ACK? */
>  		if (!th->ack ||
> @@ -145,11 +145,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
>  		 * reset.
>  		 */
>  		if (!th->fin ||
> -		    TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
> -kill_with_rst:
> -			inet_twsk_deschedule_put(tw);



> +		    TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1)
>  			return TCP_TW_RST;
> -		}
>  
>  		/* FIN arrived, enter true time-wait state. */
>  		tw->tw_substate	  = TCP_TIME_WAIT;
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index f03d2b0..2637b61 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -841,6 +841,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
>  	int genhash;
>  	struct sock *sk1 = NULL;
>  #endif
> +	bool have_full_sk;
>  	int oif;
>  
>  	if (th->rst)
> @@ -852,9 +853,12 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
>  	if (!sk && !ipv6_unicast_destination(skb))
>  		return;
>  
> +	have_full_sk = sk && sk_fullsock(sk);
>  #ifdef CONFIG_TCP_MD5SIG
>  	hash_location = tcp_parse_md5sig_option(th);
> -	if (!sk && hash_location) {
> +	if (have_full_sk) {
> +		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
> +	} else if (hash_location) {
>  		/*
>  		 * active side is lost. Try to find listening socket through
>  		 * source port, and then find md5 key through listening socket.
> @@ -877,8 +881,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
>  		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
>  		if (genhash || memcmp(hash_location, newhash, 16) != 0)
>  			goto release_sk1;
> -	} else {
> -		key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
>  	}
>  #endif
>  
> @@ -889,6 +891,9 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
>  			  (th->doff << 2);
>  
>  	oif = sk ? sk->sk_bound_dev_if : 0;
> +	if (!have_full_sk)
> +		sk = NULL;
> +

I have no idea why you need to set sk to NULL here.
This seems not related to this patch.

I found this hard to review...
It seems you have multiple logical changes ?

Splitting into at least 2 patches would be nice.

>  	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
>  
>  #ifdef CONFIG_TCP_MD5SIG
> @@ -1516,7 +1521,9 @@ do_time_wait:
>  		break;
>  	case TCP_TW_RST:
>  		tcp_v6_restore_cb(skb);
> -		goto no_tcp_socket;
> +		tcp_v6_send_reset(sk, skb);
> +		inet_twsk_deschedule_put(inet_twsk(sk));
> +		goto discard_it;
>  	case TCP_TW_SUCCESS:
>  		;
>  	}


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ