[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20240315190212.23517-1-kuniyu@amazon.com>
Date: Fri, 15 Mar 2024 12:02:12 -0700
From: Kuniyuki Iwashima <kuniyu@...zon.com>
To: <edumazet@...gle.com>
CC: <andrii@...nel.org>, <ast@...nel.org>, <bpf@...r.kernel.org>,
<daniel@...earbox.net>, <kuni1840@...il.com>, <kuniyu@...zon.com>,
<martin.lau@...ux.dev>, <netdev@...r.kernel.org>, <pabeni@...hat.com>
Subject: Re: [PATCH v8 bpf-next 4/6] bpf: tcp: Handle BPF SYN Cookie in cookie_v[46]_check().
From: Eric Dumazet <edumazet@...gle.com>
Date: Fri, 15 Mar 2024 14:37:57 +0100
> On Mon, Jan 15, 2024 at 9:57 PM Kuniyuki Iwashima <kuniyu@...zon.com> wrote:
> >
> > We will support arbitrary SYN Cookie with BPF in the following
> > patch.
> >
> > If BPF prog validates ACK and kfunc allocates a reqsk, it will
> > be carried to cookie_[46]_check() as skb->sk. If skb->sk is not
> > NULL, we call cookie_bpf_check().
> >
> > Then, we clear skb->sk and skb->destructor, which are needed not
> > to hold refcnt for reqsk and the listener. See the following patch
> > for details.
> >
> > After that, we finish initialisation for the remaining fields with
> > cookie_tcp_reqsk_init().
> >
> > Note that the server side WScale is set only for non-BPF SYN Cookie.
>
> So the difference between BPF and non-BPF is using a req->syncookie
> which had a prior meaning ?
Yes, it was used only in tcp_conn_request(), so I reused the field
and added another meaning for syncookie ACK path.
>
> This is very confusing, and needs documentation/comments.
Will add comment in request_sock.h and syncookies.c
>
>
> >
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com>
> > ---
> > include/net/tcp.h | 20 ++++++++++++++++++++
> > net/ipv4/syncookies.c | 31 +++++++++++++++++++++++++++----
> > net/ipv6/syncookies.c | 13 +++++++++----
> > 3 files changed, 56 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > index 114000e71a46..dfe99a084a71 100644
> > --- a/include/net/tcp.h
> > +++ b/include/net/tcp.h
> > @@ -599,6 +599,26 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *
> > dst_feature(dst, RTAX_FEATURE_ECN);
> > }
> >
> > +#if IS_ENABLED(CONFIG_BPF)
> > +static inline bool cookie_bpf_ok(struct sk_buff *skb)
> > +{
> > + return skb->sk;
> > +}
> > +
> > +struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb);
> > +#else
> > +static inline bool cookie_bpf_ok(struct sk_buff *skb)
> > +{
> > + return false;
> > +}
> > +
> > +static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk,
> > + struct sk_buff *skb)
> > +{
> > + return NULL;
> > +}
> > +#endif
> > +
> > /* From net/ipv6/syncookies.c */
> > int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th);
> > struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
> > diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> > index 981944c22820..be88bf586ff9 100644
> > --- a/net/ipv4/syncookies.c
> > +++ b/net/ipv4/syncookies.c
> > @@ -295,6 +295,24 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
> > return 0;
> > }
> >
> > +#if IS_ENABLED(CONFIG_BPF)
> > +struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb)
> > +{
> > + struct request_sock *req = inet_reqsk(skb->sk);
> > +
> > + skb->sk = NULL;
> > + skb->destructor = NULL;
> > +
> > + if (cookie_tcp_reqsk_init(sk, skb, req)) {
> > + reqsk_free(req);
> > + req = NULL;
> > + }
> > +
> > + return req;
> > +}
> > +EXPORT_SYMBOL_GPL(cookie_bpf_check);
> > +#endif
> > +
> > struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
> > struct sock *sk, struct sk_buff *skb,
> > struct tcp_options_received *tcp_opt,
> > @@ -395,9 +413,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> > !th->ack || th->rst)
> > goto out;
> >
> > - req = cookie_tcp_check(net, sk, skb);
> > - if (IS_ERR(req))
> > - goto out;
> > + if (cookie_bpf_ok(skb)) {
> > + req = cookie_bpf_check(sk, skb);
> > + } else {
> > + req = cookie_tcp_check(net, sk, skb);
> > + if (IS_ERR(req))
> > + goto out;
> > + }
> > if (!req)
> > goto out_drop;
> >
> > @@ -445,7 +467,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
> > ireq->wscale_ok, &rcv_wscale,
> > dst_metric(&rt->dst, RTAX_INITRWND));
> >
> > - ireq->rcv_wscale = rcv_wscale;
> > + if (!req->syncookie)
> > + ireq->rcv_wscale = rcv_wscale;
> > ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
> >
> > ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
> > diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> > index c8d2ca27220c..6b9c69278819 100644
> > --- a/net/ipv6/syncookies.c
> > +++ b/net/ipv6/syncookies.c
> > @@ -182,9 +182,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> > !th->ack || th->rst)
> > goto out;
> >
> > - req = cookie_tcp_check(net, sk, skb);
> > - if (IS_ERR(req))
> > - goto out;
> > + if (cookie_bpf_ok(skb)) {
> > + req = cookie_bpf_check(sk, skb);
> > + } else {
> > + req = cookie_tcp_check(net, sk, skb);
> > + if (IS_ERR(req))
> > + goto out;
> > + }
> > if (!req)
> > goto out_drop;
> >
> > @@ -247,7 +251,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
> > ireq->wscale_ok, &rcv_wscale,
> > dst_metric(dst, RTAX_INITRWND));
> >
> > - ireq->rcv_wscale = rcv_wscale;
> > + if (!req->syncookie)
> > + ireq->rcv_wscale = rcv_wscale;
>
> I think a comment is deserved. I do not understand this.
>
> cookie_v6_check() is dealing with syncookie, unless I am mistaken.
Exactly, in both cases, here we handle syncookie, but req->syncookie
can be true only for the BPF case.
> Also syzbot is not happy, req->syncookie might be uninitialized here.
I'll make sure we init the field during allocation.
Thank you!
>
> BUG: KMSAN: uninit-value in cookie_v4_check+0x22b7/0x29e0
> net/ipv4/syncookies.c:477
> cookie_v4_check+0x22b7/0x29e0 net/ipv4/syncookies.c:477
> tcp_v4_cookie_check net/ipv4/tcp_ipv4.c:1855 [inline]
> tcp_v4_do_rcv+0xb17/0x10b0 net/ipv4/tcp_ipv4.c:1914
> tcp_v4_rcv+0x4ce4/0x5420 net/ipv4/tcp_ipv4.c:2322
> ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205
> ip_local_deliver_finish+0x332/0x500 net/ipv4/ip_input.c:233
> NF_HOOK include/linux/netfilter.h:314 [inline]
> ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254
> dst_input include/net/dst.h:460 [inline]
> ip_rcv_finish+0x4a2/0x520 net/ipv4/ip_input.c:449
> NF_HOOK include/linux/netfilter.h:314 [inline]
> ip_rcv+0xcd/0x380 net/ipv4/ip_input.c:569
> __netif_receive_skb_one_core net/core/dev.c:5538 [inline]
> __netif_receive_skb+0x319/0x9e0 net/core/dev.c:5652
> process_backlog+0x480/0x8b0 net/core/dev.c:5981
> __napi_poll+0xe7/0x980 net/core/dev.c:6632
> napi_poll net/core/dev.c:6701 [inline]
> net_rx_action+0x89d/0x1820 net/core/dev.c:6813
> __do_softirq+0x1c0/0x7d7 kernel/softirq.c:554
> do_softirq+0x9a/0x100 kernel/softirq.c:455
> __local_bh_enable_ip+0x9f/0xb0 kernel/softirq.c:382
> local_bh_enable include/linux/bottom_half.h:33 [inline]
> rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline]
> __dev_queue_xmit+0x2776/0x52c0 net/core/dev.c:4362
> dev_queue_xmit include/linux/netdevice.h:3091 [inline]
> neigh_hh_output include/net/neighbour.h:526 [inline]
> neigh_output include/net/neighbour.h:540 [inline]
> ip_finish_output2+0x187a/0x1b70 net/ipv4/ip_output.c:235
> __ip_finish_output+0x287/0x810
> ip_finish_output+0x4b/0x550 net/ipv4/ip_output.c:323
> NF_HOOK_COND include/linux/netfilter.h:303 [inline]
> ip_output+0x15f/0x3f0 net/ipv4/ip_output.c:433
> dst_output include/net/dst.h:450 [inline]
> ip_local_out net/ipv4/ip_output.c:129 [inline]
> __ip_queue_xmit+0x1e93/0x2030 net/ipv4/ip_output.c:535
> ip_queue_xmit+0x60/0x80 net/ipv4/ip_output.c:549
> __tcp_transmit_skb+0x3c70/0x4890 net/ipv4/tcp_output.c:1462
> tcp_transmit_skb net/ipv4/tcp_output.c:1480 [inline]
> tcp_write_xmit+0x3ee1/0x8900 net/ipv4/tcp_output.c:2792
> __tcp_push_pending_frames net/ipv4/tcp_output.c:2977 [inline]
> tcp_send_fin+0xa90/0x12e0 net/ipv4/tcp_output.c:3578
> tcp_shutdown+0x198/0x1f0 net/ipv4/tcp.c:2716
> inet_shutdown+0x33f/0x5b0 net/ipv4/af_inet.c:923
> __sys_shutdown_sock net/socket.c:2425 [inline]
> __sys_shutdown net/socket.c:2437 [inline]
> __do_sys_shutdown net/socket.c:2445 [inline]
> __se_sys_shutdown+0x2a4/0x440 net/socket.c:2443
> __x64_sys_shutdown+0x6c/0xa0 net/socket.c:2443
> do_syscall_64+0xd5/0x1f0
> entry_SYSCALL_64_after_hwframe+0x6d/0x75
>
> Uninit was stored to memory at:
> reqsk_alloc include/net/request_sock.h:148 [inline]
> inet_reqsk_alloc+0x651/0x7a0 net/ipv4/tcp_input.c:6978
> cookie_tcp_reqsk_alloc+0xd4/0x900 net/ipv4/syncookies.c:328
> cookie_tcp_check net/ipv4/syncookies.c:388 [inline]
> cookie_v4_check+0x289f/0x29e0 net/ipv4/syncookies.c:420
> tcp_v4_cookie_check net/ipv4/tcp_ipv4.c:1855 [inline]
> tcp_v4_do_rcv+0xb17/0x10b0 net/ipv4/tcp_ipv4.c:1914
> tcp_v4_rcv+0x4ce4/0x5420 net/ipv4/tcp_ipv4.c:2322
> ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205
> ip_local_deliver_finish+0x332/0x500 net/ipv4/ip_input.c:233
> NF_HOOK include/linux/netfilter.h:314 [inline]
> ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254
> dst_input include/net/dst.h:460 [inline]
> ip_rcv_finish+0x4a2/0x520 net/ipv4/ip_input.c:449
> NF_HOOK include/linux/netfilter.h:314 [inline]
> ip_rcv+0xcd/0x380 net/ipv4/ip_input.c:569
> __netif_receive_skb_one_core net/core/dev.c:5538 [inline]
> __netif_receive_skb+0x319/0x9e0 net/core/dev.c:5652
> process_backlog+0x480/0x8b0 net/core/dev.c:5981
> __napi_poll+0xe7/0x980 net/core/dev.c:6632
> napi_poll net/core/dev.c:6701 [inline]
> net_rx_action+0x89d/0x1820 net/core/dev.c:6813
> __do_softirq+0x1c0/0x7d7 kernel/softirq.c:554
>
> Uninit was created at:
> __alloc_pages+0x9a7/0xe00 mm/page_alloc.c:4592
> __alloc_pages_node include/linux/gfp.h:238 [inline]
> alloc_pages_node include/linux/gfp.h:261 [inline]
> alloc_slab_page mm/slub.c:2175 [inline]
> allocate_slab mm/slub.c:2338 [inline]
> new_slab+0x2de/0x1400 mm/slub.c:2391
> ___slab_alloc+0x1184/0x33d0 mm/slub.c:3525
> __slab_alloc mm/slub.c:3610 [inline]
> __slab_alloc_node mm/slub.c:3663 [inline]
> slab_alloc_node mm/slub.c:3835 [inline]
> kmem_cache_alloc+0x6d3/0xbe0 mm/slub.c:3852
> reqsk_alloc include/net/request_sock.h:131 [inline]
> inet_reqsk_alloc+0x66/0x7a0 net/ipv4/tcp_input.c:6978
> tcp_conn_request+0x484/0x44e0 net/ipv4/tcp_input.c:7135
> tcp_v4_conn_request+0x16f/0x1d0 net/ipv4/tcp_ipv4.c:1716
> tcp_rcv_state_process+0x2e5/0x4bb0 net/ipv4/tcp_input.c:6655
> tcp_v4_do_rcv+0xbfd/0x10b0 net/ipv4/tcp_ipv4.c:1929
> tcp_v4_rcv+0x4ce4/0x5420 net/ipv4/tcp_ipv4.c:2322
> ip_protocol_deliver_rcu+0x2a3/0x13d0 net/ipv4/ip_input.c:205
> ip_local_deliver_finish+0x332/0x500 net/ipv4/ip_input.c:233
> NF_HOOK include/linux/netfilter.h:314 [inline]
> ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254
> dst_input include/net/dst.h:460 [inline]
> ip_sublist_rcv_finish net/ipv4/ip_input.c:580 [inline]
> ip_list_rcv_finish net/ipv4/ip_input.c:631 [inline]
> ip_sublist_rcv+0x15f3/0x17f0 net/ipv4/ip_input.c:639
> ip_list_rcv+0x9ef/0xa40 net/ipv4/ip_input.c:674
> __netif_receive_skb_list_ptype net/core/dev.c:5581 [inline]
> __netif_receive_skb_list_core+0x15c5/0x1670 net/core/dev.c:5629
> __netif_receive_skb_list net/core/dev.c:5681 [inline]
> netif_receive_skb_list_internal+0x106c/0x16f0 net/core/dev.c:5773
> gro_normal_list include/net/gro.h:438 [inline]
> napi_complete_done+0x425/0x880 net/core/dev.c:6113
> virtqueue_napi_complete drivers/net/virtio_net.c:465 [inline]
> virtnet_poll+0x149d/0x2240 drivers/net/virtio_net.c:2211
> __napi_poll+0xe7/0x980 net/core/dev.c:6632
> napi_poll net/core/dev.c:6701 [inline]
> net_rx_action+0x89d/0x1820 net/core/dev.c:6813
> __do_softirq+0x1c0/0x7d7 kernel/softirq.c:554
>
> CPU: 0 PID: 16792 Comm: syz-executor.2 Not tainted
> 6.8.0-syzkaller-05562-g61387b8dcf1d #0
> Hardware name: Google Google Compute Engine/Google Compute Engine,
> BIOS Google 02/29/2024
Powered by blists - more mailing lists