lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c656e270bec67c2d7c99c42249b8cc890bec22f9.camel@redhat.com>
Date: Tue, 28 Nov 2023 11:34:56 +0100
From: Paolo Abeni <pabeni@...hat.com>
To: Kuniyuki Iwashima <kuniyu@...zon.com>, "David S. Miller"
	 <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
	 <kuba@...nel.org>
Cc: Simon Horman <horms@...nel.org>, Kuniyuki Iwashima <kuni1840@...il.com>,
  netdev@...r.kernel.org
Subject: Re: [PATCH v2 net-next 8/8] tcp: Factorise cookie-dependent fields
 initialisation in cookie_v[46]_check()

Hi,

On Fri, 2023-11-24 at 17:16 -0800, Kuniyuki Iwashima wrote:
> We will support arbitrary SYN Cookie with BPF, and then kfunc at
> TC will preallocate reqsk and initialise some fields that should
> not be overwritten later by cookie_v[46]_check().
> 
> To simplify the flow in cookie_v[46]_check(), we move such fields'
> initialisation to cookie_tcp_reqsk_alloc() and factorise non-BPF
> SYN Cookie handling into cookie_tcp_check(), where we validate the
> cookie and allocate reqsk, as done by kfunc later.
> 
> Note that we set ireq->ecn_ok in two steps, the latter of which will
> be shared by the BPF case.  As cookie_ecn_ok() is one-liner, now
> it's inlined.
> 
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com>
> Reviewed-by: Simon Horman <horms@...nel.org>
> ---
>  include/net/tcp.h     |  13 ++++--
>  net/ipv4/syncookies.c | 106 +++++++++++++++++++++++-------------------
>  net/ipv6/syncookies.c |  61 ++++++++++++------------
>  3 files changed, 99 insertions(+), 81 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index d4d0e9763175..973555cb1d3f 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -494,7 +494,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
>  int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th);
>  struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
>  struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
> -					    struct sock *sk, struct sk_buff *skb);
> +					    struct sock *sk, struct sk_buff *skb,
> +					    struct tcp_options_received *tcp_opt,
> +					    int mss, u32 tsoff);
> +
>  #ifdef CONFIG_SYN_COOKIES
>  
>  /* Syncookies use a monotonic timer which increments every 60 seconds.
> @@ -580,8 +583,12 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
>  u64 cookie_init_timestamp(struct request_sock *req, u64 now);
>  bool cookie_timestamp_decode(const struct net *net,
>  			     struct tcp_options_received *opt);
> -bool cookie_ecn_ok(const struct tcp_options_received *opt,
> -		   const struct net *net, const struct dst_entry *dst);
> +
> +static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst)
> +{
> +	return READ_ONCE(net->ipv4.sysctl_tcp_ecn) ||
> +		dst_feature(dst, RTAX_FEATURE_ECN);
> +}
>  
>  /* From net/ipv6/syncookies.c */
>  int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th);
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index f4bcd4822fe0..5be12f186c26 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -270,21 +270,6 @@ bool cookie_timestamp_decode(const struct net *net,
>  }
>  EXPORT_SYMBOL(cookie_timestamp_decode);
>  
> -bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
> -		   const struct net *net, const struct dst_entry *dst)
> -{
> -	bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
> -
> -	if (!ecn_ok)
> -		return false;
> -
> -	if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
> -		return true;
> -
> -	return dst_feature(dst, RTAX_FEATURE_ECN);
> -}
> -EXPORT_SYMBOL(cookie_ecn_ok);
> -
>  static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
>  				 struct request_sock *req)
>  {
> @@ -320,8 +305,12 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
>  }
>  
>  struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
> -					    struct sock *sk, struct sk_buff *skb)
> +					    struct sock *sk, struct sk_buff *skb,
> +					    struct tcp_options_received *tcp_opt,
> +					    int mss, u32 tsoff)
>  {
> +	struct inet_request_sock *ireq;
> +	struct tcp_request_sock *treq;
>  	struct request_sock *req;
>  
>  	if (sk_is_mptcp(sk))
> @@ -337,40 +326,36 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
>  		return NULL;
>  	}
>  
> +	ireq = inet_rsk(req);
> +	treq = tcp_rsk(req);
> +
> +	req->mss = mss;
> +	req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0;
> +
> +	ireq->snd_wscale = tcp_opt->snd_wscale;
> +	ireq->tstamp_ok = tcp_opt->saw_tstamp;
> +	ireq->sack_ok = tcp_opt->sack_ok;
> +	ireq->wscale_ok = tcp_opt->wscale_ok;
> +	ireq->ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
> +
> +	treq->ts_off = tsoff;
> +
>  	return req;
>  }
>  EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc);
>  
> -/* On input, sk is a listener.
> - * Output is listener if incoming packet would not create a child
> - *           NULL if memory could not be allocated.
> - */
> -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
> +					     struct sk_buff *skb)
>  {
> -	struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
> -	const struct tcphdr *th = tcp_hdr(skb);
>  	struct tcp_options_received tcp_opt;
> -	struct tcp_sock *tp = tcp_sk(sk);
> -	struct inet_request_sock *ireq;
> -	struct net *net = sock_net(sk);
> -	struct tcp_request_sock *treq;
> -	struct request_sock *req;
> -	struct sock *ret = sk;
> -	int full_space, mss;
> -	struct flowi4 fl4;
> -	struct rtable *rt;
> -	__u8 rcv_wscale;
>  	u32 tsoff = 0;
> -
> -	if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> -	    !th->ack || th->rst)
> -		goto out;
> +	int mss;
>  
>  	if (tcp_synq_no_recent_overflow(sk))
>  		goto out;
>  
> -	mss = __cookie_v4_check(ip_hdr(skb), th);
> -	if (mss == 0) {
> +	mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb));
> +	if (!mss) {
>  		__NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
>  		goto out;
>  	}
> @@ -391,21 +376,44 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
>  	if (!cookie_timestamp_decode(net, &tcp_opt))
>  		goto out;
>  
> -	req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
> +	return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb,
> +				      &tcp_opt, mss, tsoff);
> +out:
> +	return ERR_PTR(-EINVAL);
> +}
> +
> +/* On input, sk is a listener.
> + * Output is listener if incoming packet would not create a child
> + *           NULL if memory could not be allocated.
> + */
> +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> +{
> +	struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
> +	const struct tcphdr *th = tcp_hdr(skb);
> +	struct tcp_sock *tp = tcp_sk(sk);
> +	struct inet_request_sock *ireq;
> +	struct net *net = sock_net(sk);
> +	struct request_sock *req;
> +	struct sock *ret = sk;
> +	struct flowi4 fl4;
> +	struct rtable *rt;
> +	__u8 rcv_wscale;
> +	int full_space;
> +
> +	if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> +	    !th->ack || th->rst)
> +		goto out;
> +
> +	req = cookie_tcp_check(net, sk, skb);
> +	if (IS_ERR(req))
> +		goto out;
>  	if (!req)
>  		goto out_drop;
>  
>  	ireq = inet_rsk(req);
> -	treq = tcp_rsk(req);
> -	treq->ts_off		= tsoff;
> -	req->mss		= mss;
> +
>  	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
>  	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
> -	ireq->snd_wscale	= tcp_opt.snd_wscale;
> -	ireq->sack_ok		= tcp_opt.sack_ok;
> -	ireq->wscale_ok		= tcp_opt.wscale_ok;
> -	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
> -	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
>  
>  	/* We throwed the options of the initial SYN away, so we hope
>  	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
> @@ -447,7 +455,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
>  				  dst_metric(&rt->dst, RTAX_INITRWND));
>  
>  	ireq->rcv_wscale  = rcv_wscale;
> -	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst);
> +	ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
>  
>  	ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
>  	/* ip_queue_xmit() depends on our flow being setup
> diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> index e0a9220d1536..c8d2ca27220c 100644
> --- a/net/ipv6/syncookies.c
> +++ b/net/ipv6/syncookies.c
> @@ -127,31 +127,18 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th)
>  }
>  EXPORT_SYMBOL_GPL(__cookie_v6_check);
>  
> -struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
> +					     struct sk_buff *skb)
>  {
> -	const struct tcphdr *th = tcp_hdr(skb);
> -	struct ipv6_pinfo *np = inet6_sk(sk);
>  	struct tcp_options_received tcp_opt;
> -	struct tcp_sock *tp = tcp_sk(sk);
> -	struct inet_request_sock *ireq;
> -	struct net *net = sock_net(sk);
> -	struct tcp_request_sock *treq;
> -	struct request_sock *req;
> -	struct dst_entry *dst;
> -	struct sock *ret = sk;
> -	int full_space, mss;
> -	__u8 rcv_wscale;
>  	u32 tsoff = 0;
> -
> -	if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> -	    !th->ack || th->rst)
> -		goto out;
> +	int mss;
>  
>  	if (tcp_synq_no_recent_overflow(sk))
>  		goto out;
>  
> -	mss = __cookie_v6_check(ipv6_hdr(skb), th);
> -	if (mss == 0) {
> +	mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb));
> +	if (!mss) {
>  		__NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
>  		goto out;
>  	}
> @@ -172,14 +159,37 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
>  	if (!cookie_timestamp_decode(net, &tcp_opt))
>  		goto out;
>  
> -	req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
> +	return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb,
> +				      &tcp_opt, mss, tsoff);
> +out:
> +	return ERR_PTR(-EINVAL);
> +}
> +
> +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> +{
> +	const struct tcphdr *th = tcp_hdr(skb);
> +	struct ipv6_pinfo *np = inet6_sk(sk);
> +	struct tcp_sock *tp = tcp_sk(sk);
> +	struct inet_request_sock *ireq;
> +	struct net *net = sock_net(sk);
> +	struct request_sock *req;
> +	struct dst_entry *dst;
> +	struct sock *ret = sk;
> +	__u8 rcv_wscale;
> +	int full_space;
> +
> +	if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> +	    !th->ack || th->rst)
> +		goto out;
> +
> +	req = cookie_tcp_check(net, sk, skb);
> +	if (IS_ERR(req))
> +		goto out;
>  	if (!req)
>  		goto out_drop;
>  
>  	ireq = inet_rsk(req);
> -	treq = tcp_rsk(req);
>  
> -	req->mss = mss;
>  	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
>  	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
>  
> @@ -198,13 +208,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
>  	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
>  		ireq->ir_iif = tcp_v6_iif(skb);
>  
> -	ireq->snd_wscale	= tcp_opt.snd_wscale;
> -	ireq->sack_ok		= tcp_opt.sack_ok;
> -	ireq->wscale_ok		= tcp_opt.wscale_ok;
> -	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
> -	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
> -	treq->ts_off = tsoff;
> -
>  	tcp_ao_syncookie(sk, skb, req, AF_INET6);
>  
>  	/*
> @@ -245,7 +248,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
>  				  dst_metric(dst, RTAX_INITRWND));
>  
>  	ireq->rcv_wscale = rcv_wscale;
> -	ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst);
> +	ireq->ecn_ok &= cookie_ecn_ok(net, dst);

Nice cleanup! IMHO looks very good. But deserves Eric's explicit ack, I
think ;)

The only question I have (out of sheer curiosity, no change requested
here) is:

have you considered leaving the 'ecn_ok' initialization unchanged
(looks a little cleaner as a single step init)? Is that for later's
patch sake? (I haven't looked at them yet).

Cheers,

Paolo


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ