[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iJVnS_dGDtU7AVWgVrun-p68DZ0A3Pde47MHNeeQ2nwRA@mail.gmail.com>
Date: Thu, 31 Aug 2023 08:58:51 +0200
From: Eric Dumazet <edumazet@...gle.com>
To: Mohamed Khalfella <mkhalfella@...estorage.com>
Cc: willemdebruijn.kernel@...il.com, alexanderduyck@...com,
bpf@...r.kernel.org, brouer@...hat.com, davem@...emloft.net,
dhowells@...hat.com, keescook@...omium.org, kuba@...nel.org,
linux-kernel@...r.kernel.org, netdev@...r.kernel.org, pabeni@...hat.com,
willemb@...gle.com, stable@...r.kernel.org
Subject: Re: [PATCH v2] skbuff: skb_segment, Call zero copy functions before
using skbuff frags
On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
<mkhalfella@...estorage.com> wrote:
>
> Commit bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions
> once per nskb") added the call to zero copy functions in skb_segment().
> The change introduced a bug in skb_segment() because skb_orphan_frags()
> may possibly change the number of fragments or allocate new fragments
> altogether leaving nrfrags and frag to point to the old values. This can
> cause a panic with stacktrace like the one below.
>
> [ 193.894380] BUG: kernel NULL pointer dereference, address: 00000000000000bc
> [ 193.895273] CPU: 13 PID: 18164 Comm: vh-net-17428 Kdump: loaded Tainted: G O 5.15.123+ #26
> [ 193.903919] RIP: 0010:skb_segment+0xb0e/0x12f0
> [ 194.021892] Call Trace:
> [ 194.027422] <TASK>
> [ 194.072861] tcp_gso_segment+0x107/0x540
> [ 194.082031] inet_gso_segment+0x15c/0x3d0
> [ 194.090783] skb_mac_gso_segment+0x9f/0x110
> [ 194.095016] __skb_gso_segment+0xc1/0x190
> [ 194.103131] netem_enqueue+0x290/0xb10 [sch_netem]
> [ 194.107071] dev_qdisc_enqueue+0x16/0x70
> [ 194.110884] __dev_queue_xmit+0x63b/0xb30
> [ 194.121670] bond_start_xmit+0x159/0x380 [bonding]
> [ 194.128506] dev_hard_start_xmit+0xc3/0x1e0
> [ 194.131787] __dev_queue_xmit+0x8a0/0xb30
> [ 194.138225] macvlan_start_xmit+0x4f/0x100 [macvlan]
> [ 194.141477] dev_hard_start_xmit+0xc3/0x1e0
> [ 194.144622] sch_direct_xmit+0xe3/0x280
> [ 194.147748] __dev_queue_xmit+0x54a/0xb30
> [ 194.154131] tap_get_user+0x2a8/0x9c0 [tap]
> [ 194.157358] tap_sendmsg+0x52/0x8e0 [tap]
> [ 194.167049] handle_tx_zerocopy+0x14e/0x4c0 [vhost_net]
> [ 194.173631] handle_tx+0xcd/0xe0 [vhost_net]
> [ 194.176959] vhost_worker+0x76/0xb0 [vhost]
> [ 194.183667] kthread+0x118/0x140
> [ 194.190358] ret_from_fork+0x1f/0x30
> [ 194.193670] </TASK>
>
> In this case calling skb_orphan_frags() updated nr_frags leaving nrfrags
> local variable in skb_segment() stale. This resulted in the code hitting
> i >= nrfrags prematurely and trying to move to next frag_skb using
> list_skb pointer, which was NULL, and caused kernel panic. Move the call
> to zero copy functions before using frags and nr_frags.
>
> Fixes: bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions once per nskb")
> Signed-off-by: Mohamed Khalfella <mkhalfella@...estorage.com>
> Reported-by: Amit Goyal <agoyal@...estorage.com>
> Cc: stable@...r.kernel.org
> ---
> net/core/skbuff.c | 34 ++++++++++++++++++++--------------
> 1 file changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index a298992060e6..18a33dc2d6af 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4354,21 +4354,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> struct sk_buff *segs = NULL;
> struct sk_buff *tail = NULL;
> struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
> - skb_frag_t *frag = skb_shinfo(head_skb)->frags;
> unsigned int mss = skb_shinfo(head_skb)->gso_size;
> unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
> - struct sk_buff *frag_skb = head_skb;
> unsigned int offset = doffset;
> unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
> unsigned int partial_segs = 0;
> unsigned int headroom;
> unsigned int len = head_skb->len;
> + struct sk_buff *frag_skb;
> + skb_frag_t *frag;
> __be16 proto;
> bool csum, sg;
> - int nfrags = skb_shinfo(head_skb)->nr_frags;
> int err = -ENOMEM;
> int i = 0;
> - int pos;
> + int nfrags, pos;
>
> if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
> mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
> @@ -4445,6 +4444,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> headroom = skb_headroom(head_skb);
> pos = skb_headlen(head_skb);
>
> + if (skb_orphan_frags(head_skb, GFP_ATOMIC))
> + return ERR_PTR(-ENOMEM);
> +
> + nfrags = skb_shinfo(head_skb)->nr_frags;
> + frag = skb_shinfo(head_skb)->frags;
> + frag_skb = head_skb;
> +
> do {
> struct sk_buff *nskb;
> skb_frag_t *nskb_frag;
> @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> (skb_headlen(list_skb) == len || sg)) {
> BUG_ON(skb_headlen(list_skb) > len);
>
> + nskb = skb_clone(list_skb, GFP_ATOMIC);
> + if (unlikely(!nskb))
> + goto err;
> +
This patch is quite complex to review, so I am asking if this part was
really needed ?
<1> : You moved here <2> and <3>
If this is not strictly needed, please keep the code as is to ease
code review...
> i = 0;
> nfrags = skb_shinfo(list_skb)->nr_frags;
> frag = skb_shinfo(list_skb)->frags;
> @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> frag++;
> }
>
> - nskb = skb_clone(list_skb, GFP_ATOMIC);
<2>
> list_skb = list_skb->next;
>
> - if (unlikely(!nskb))
> - goto err;
> -
<3>
> if (unlikely(pskb_trim(nskb, len))) {
> kfree_skb(nskb);
> goto err;
> @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
> SKBFL_SHARED_FRAG;
>
> - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> + if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
Why using list_skb here instead of frag_skb ?
Again, I have to look at the whole thing to understand why you did this.
> goto err;
>
> while (pos < offset + len) {
> if (i >= nfrags) {
> + if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
> + skb_zerocopy_clone(nskb, list_skb,
> + GFP_ATOMIC))
> + goto err;
> +
This part is fine.
> i = 0;
> nfrags = skb_shinfo(list_skb)->nr_frags;
> frag = skb_shinfo(list_skb)->frags;
> @@ -4583,10 +4593,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> i--;
> frag--;
> }
> - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> - skb_zerocopy_clone(nskb, frag_skb,
> - GFP_ATOMIC))
> - goto err;
>
> list_skb = list_skb->next;
> }
> --
> 2.17.1
>
Thanks.
Powered by blists - more mailing lists