[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c656e270bec67c2d7c99c42249b8cc890bec22f9.camel@redhat.com>
Date: Tue, 28 Nov 2023 11:34:56 +0100
From: Paolo Abeni <pabeni@...hat.com>
To: Kuniyuki Iwashima <kuniyu@...zon.com>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
<kuba@...nel.org>
Cc: Simon Horman <horms@...nel.org>, Kuniyuki Iwashima <kuni1840@...il.com>,
netdev@...r.kernel.org
Subject: Re: [PATCH v2 net-next 8/8] tcp: Factorise cookie-dependent fields
initialisation in cookie_v[46]_check()
Hi,
On Fri, 2023-11-24 at 17:16 -0800, Kuniyuki Iwashima wrote:
> We will support arbitrary SYN Cookie with BPF, and then kfunc at
> TC will preallocate reqsk and initialise some fields that should
> not be overwritten later by cookie_v[46]_check().
>
> To simplify the flow in cookie_v[46]_check(), we move such fields'
> initialisation to cookie_tcp_reqsk_alloc() and factorise non-BPF
> SYN Cookie handling into cookie_tcp_check(), where we validate the
> cookie and allocate reqsk, as done by kfunc later.
>
> Note that we set ireq->ecn_ok in two steps, the latter of which will
> be shared by the BPF case. As cookie_ecn_ok() is one-liner, now
> it's inlined.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com>
> Reviewed-by: Simon Horman <horms@...nel.org>
> ---
> include/net/tcp.h | 13 ++++--
> net/ipv4/syncookies.c | 106 +++++++++++++++++++++++-------------------
> net/ipv6/syncookies.c | 61 ++++++++++++------------
> 3 files changed, 99 insertions(+), 81 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index d4d0e9763175..973555cb1d3f 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -494,7 +494,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
> int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th);
> struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
> struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
> - struct sock *sk, struct sk_buff *skb);
> + struct sock *sk, struct sk_buff *skb,
> + struct tcp_options_received *tcp_opt,
> + int mss, u32 tsoff);
> +
> #ifdef CONFIG_SYN_COOKIES
>
> /* Syncookies use a monotonic timer which increments every 60 seconds.
> @@ -580,8 +583,12 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
> u64 cookie_init_timestamp(struct request_sock *req, u64 now);
> bool cookie_timestamp_decode(const struct net *net,
> struct tcp_options_received *opt);
> -bool cookie_ecn_ok(const struct tcp_options_received *opt,
> - const struct net *net, const struct dst_entry *dst);
> +
> +static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst)
> +{
> + return READ_ONCE(net->ipv4.sysctl_tcp_ecn) ||
> + dst_feature(dst, RTAX_FEATURE_ECN);
> +}
>
> /* From net/ipv6/syncookies.c */
> int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th);
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index f4bcd4822fe0..5be12f186c26 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -270,21 +270,6 @@ bool cookie_timestamp_decode(const struct net *net,
> }
> EXPORT_SYMBOL(cookie_timestamp_decode);
>
> -bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
> - const struct net *net, const struct dst_entry *dst)
> -{
> - bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
> -
> - if (!ecn_ok)
> - return false;
> -
> - if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
> - return true;
> -
> - return dst_feature(dst, RTAX_FEATURE_ECN);
> -}
> -EXPORT_SYMBOL(cookie_ecn_ok);
> -
> static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
> struct request_sock *req)
> {
> @@ -320,8 +305,12 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
> }
>
> struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
> - struct sock *sk, struct sk_buff *skb)
> + struct sock *sk, struct sk_buff *skb,
> + struct tcp_options_received *tcp_opt,
> + int mss, u32 tsoff)
> {
> + struct inet_request_sock *ireq;
> + struct tcp_request_sock *treq;
> struct request_sock *req;
>
> if (sk_is_mptcp(sk))
> @@ -337,40 +326,36 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
> return NULL;
> }
>
> + ireq = inet_rsk(req);
> + treq = tcp_rsk(req);
> +
> + req->mss = mss;
> + req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0;
> +
> + ireq->snd_wscale = tcp_opt->snd_wscale;
> + ireq->tstamp_ok = tcp_opt->saw_tstamp;
> + ireq->sack_ok = tcp_opt->sack_ok;
> + ireq->wscale_ok = tcp_opt->wscale_ok;
> + ireq->ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
> +
> + treq->ts_off = tsoff;
> +
> return req;
> }
> EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc);
>
> -/* On input, sk is a listener.
> - * Output is listener if incoming packet would not create a child
> - * NULL if memory could not be allocated.
> - */
> -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
> + struct sk_buff *skb)
> {
> - struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
> - const struct tcphdr *th = tcp_hdr(skb);
> struct tcp_options_received tcp_opt;
> - struct tcp_sock *tp = tcp_sk(sk);
> - struct inet_request_sock *ireq;
> - struct net *net = sock_net(sk);
> - struct tcp_request_sock *treq;
> - struct request_sock *req;
> - struct sock *ret = sk;
> - int full_space, mss;
> - struct flowi4 fl4;
> - struct rtable *rt;
> - __u8 rcv_wscale;
> u32 tsoff = 0;
> -
> - if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> - !th->ack || th->rst)
> - goto out;
> + int mss;
>
> if (tcp_synq_no_recent_overflow(sk))
> goto out;
>
> - mss = __cookie_v4_check(ip_hdr(skb), th);
> - if (mss == 0) {
> + mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb));
> + if (!mss) {
> __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
> goto out;
> }
> @@ -391,21 +376,44 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> if (!cookie_timestamp_decode(net, &tcp_opt))
> goto out;
>
> - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
> + return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb,
> + &tcp_opt, mss, tsoff);
> +out:
> + return ERR_PTR(-EINVAL);
> +}
> +
> +/* On input, sk is a listener.
> + * Output is listener if incoming packet would not create a child
> + * NULL if memory could not be allocated.
> + */
> +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> +{
> + struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
> + const struct tcphdr *th = tcp_hdr(skb);
> + struct tcp_sock *tp = tcp_sk(sk);
> + struct inet_request_sock *ireq;
> + struct net *net = sock_net(sk);
> + struct request_sock *req;
> + struct sock *ret = sk;
> + struct flowi4 fl4;
> + struct rtable *rt;
> + __u8 rcv_wscale;
> + int full_space;
> +
> + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> + !th->ack || th->rst)
> + goto out;
> +
> + req = cookie_tcp_check(net, sk, skb);
> + if (IS_ERR(req))
> + goto out;
> if (!req)
> goto out_drop;
>
> ireq = inet_rsk(req);
> - treq = tcp_rsk(req);
> - treq->ts_off = tsoff;
> - req->mss = mss;
> +
> sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
> sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
> - ireq->snd_wscale = tcp_opt.snd_wscale;
> - ireq->sack_ok = tcp_opt.sack_ok;
> - ireq->wscale_ok = tcp_opt.wscale_ok;
> - ireq->tstamp_ok = tcp_opt.saw_tstamp;
> - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
>
> /* We throwed the options of the initial SYN away, so we hope
> * the ACK carries the same options again (see RFC1122 4.2.3.8)
> @@ -447,7 +455,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> dst_metric(&rt->dst, RTAX_INITRWND));
>
> ireq->rcv_wscale = rcv_wscale;
> - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst);
> + ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
>
> ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
> /* ip_queue_xmit() depends on our flow being setup
> diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> index e0a9220d1536..c8d2ca27220c 100644
> --- a/net/ipv6/syncookies.c
> +++ b/net/ipv6/syncookies.c
> @@ -127,31 +127,18 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th)
> }
> EXPORT_SYMBOL_GPL(__cookie_v6_check);
>
> -struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
> + struct sk_buff *skb)
> {
> - const struct tcphdr *th = tcp_hdr(skb);
> - struct ipv6_pinfo *np = inet6_sk(sk);
> struct tcp_options_received tcp_opt;
> - struct tcp_sock *tp = tcp_sk(sk);
> - struct inet_request_sock *ireq;
> - struct net *net = sock_net(sk);
> - struct tcp_request_sock *treq;
> - struct request_sock *req;
> - struct dst_entry *dst;
> - struct sock *ret = sk;
> - int full_space, mss;
> - __u8 rcv_wscale;
> u32 tsoff = 0;
> -
> - if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> - !th->ack || th->rst)
> - goto out;
> + int mss;
>
> if (tcp_synq_no_recent_overflow(sk))
> goto out;
>
> - mss = __cookie_v6_check(ipv6_hdr(skb), th);
> - if (mss == 0) {
> + mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb));
> + if (!mss) {
> __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
> goto out;
> }
> @@ -172,14 +159,37 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> if (!cookie_timestamp_decode(net, &tcp_opt))
> goto out;
>
> - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
> + return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb,
> + &tcp_opt, mss, tsoff);
> +out:
> + return ERR_PTR(-EINVAL);
> +}
> +
> +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> +{
> + const struct tcphdr *th = tcp_hdr(skb);
> + struct ipv6_pinfo *np = inet6_sk(sk);
> + struct tcp_sock *tp = tcp_sk(sk);
> + struct inet_request_sock *ireq;
> + struct net *net = sock_net(sk);
> + struct request_sock *req;
> + struct dst_entry *dst;
> + struct sock *ret = sk;
> + __u8 rcv_wscale;
> + int full_space;
> +
> + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
> + !th->ack || th->rst)
> + goto out;
> +
> + req = cookie_tcp_check(net, sk, skb);
> + if (IS_ERR(req))
> + goto out;
> if (!req)
> goto out_drop;
>
> ireq = inet_rsk(req);
> - treq = tcp_rsk(req);
>
> - req->mss = mss;
> ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
> ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
>
> @@ -198,13 +208,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
> ireq->ir_iif = tcp_v6_iif(skb);
>
> - ireq->snd_wscale = tcp_opt.snd_wscale;
> - ireq->sack_ok = tcp_opt.sack_ok;
> - ireq->wscale_ok = tcp_opt.wscale_ok;
> - ireq->tstamp_ok = tcp_opt.saw_tstamp;
> - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
> - treq->ts_off = tsoff;
> -
> tcp_ao_syncookie(sk, skb, req, AF_INET6);
>
> /*
> @@ -245,7 +248,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> dst_metric(dst, RTAX_INITRWND));
>
> ireq->rcv_wscale = rcv_wscale;
> - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst);
> + ireq->ecn_ok &= cookie_ecn_ok(net, dst);
Nice cleanup! IMHO looks very good. But deserves Eric's explicit ack, I
think ;)
The only question I have (out of sheer curiosity, no change requested
here) is:
have you considered leaving the 'ecn_ok' initialization unchanged
(looks a little cleaner as a single step init)? Is that for later's
patch sake? (I haven't looked at them yet).
Cheers,
Paolo
Powered by blists - more mailing lists