[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9f0c5e45-ad79-a9ea-dab1-aeb3bc3730ae@gmail.com>
Date: Mon, 30 Aug 2021 09:01:01 -0700
From: Eric Dumazet <eric.dumazet@...il.com>
To: Vasily Averin <vvs@...tuozzo.com>,
Christoph Paasch <christoph.paasch@...il.com>,
Eric Dumazet <eric.dumazet@...il.com>,
"David S. Miller" <davem@...emloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
David Ahern <dsahern@...nel.org>,
Jakub Kicinski <kuba@...nel.org>,
netdev <netdev@...r.kernel.org>, linux-kernel@...r.kernel.org,
kernel@...nvz.org, Julian Wiedmann <jwi@...ux.ibm.com>
Subject: Re: [PATCH v2] skb_expand_head() adjust skb->truesize incorrectly
On 8/29/21 5:59 AM, Vasily Averin wrote:
> Christoph Paasch reports [1] about incorrect skb->truesize
> after skb_expand_head() call in ip6_xmit.
> This may happen because of two reasons:
> - skb_set_owner_w() for newly cloned skb is called too early,
> before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
> - pskb_expand_head() does not adjust truesize in (skb->sk) case.
> In this case sk->sk_wmem_alloc should be adjusted too.
>
> [1] https://lkml.org/lkml/2021/8/20/1082
>
> Reported-by: Christoph Paasch <christoph.paasch@...il.com>
> Signed-off-by: Vasily Averin <vvs@...tuozzo.com>
> ---
> v2: based on patch version from Eric Dumazet,
> added __pskb_expand_head() function, which can be forced
> to adjust skb->truesize and sk->sk_wmem_alloc.
> ---
> net/core/skbuff.c | 43 +++++++++++++++++++++++++++++--------------
> 1 file changed, 29 insertions(+), 14 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index f931176..4691023 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1681,10 +1681,10 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
> * reloaded after call to this function.
> */
>
> -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> - gfp_t gfp_mask)
> +static int __pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> + gfp_t gfp_mask, bool update_truesize)
> {
> - int i, osize = skb_end_offset(skb);
> + int delta, i, osize = skb_end_offset(skb);
> int size = osize + nhead + ntail;
> long off;
> u8 *data;
> @@ -1756,9 +1756,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> * For the moment, we really care of rx path, or
> * when skb is orphaned (not attached to a socket).
> */
> - if (!skb->sk || skb->destructor == sock_edemux)
> - skb->truesize += size - osize;
> -
> + delta = size - osize;
> + if (!skb->sk || skb->destructor == sock_edemux) {
> + skb->truesize += delta;
> + } else if (update_truesize) {
Unfortunately we can not always do this sk_wmem_alloc change here.
Some skb have skb->sk set, but the 'reference on socket' is not through sk_wmem_alloc
It seems you need a helper to make sure skb->destructor is one of
the destructors that use skb->truesize and sk->sk_wmem_alloc
For instance, skb_orphan_partial() could have been used.
> + refcount_add(delta, &skb->sk->sk_wmem_alloc);
> + skb->truesize += delta;
> + }
> return 0;
>
> nofrags:
> @@ -1766,6 +1770,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> nodata:
> return -ENOMEM;
> }
> +
> +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> + gfp_t gfp_mask)
> +{
> + return __pskb_expand_head(skb, nhead, ntail, gfp_mask, false);
> +}
> EXPORT_SYMBOL(pskb_expand_head);
>
> /* Make private copy of skb with writable head and some headroom */
> @@ -1804,28 +1814,33 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
> struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
> {
> int delta = headroom - skb_headroom(skb);
> + struct sk_buff *oskb = NULL;
>
> if (WARN_ONCE(delta <= 0,
> "%s is expecting an increase in the headroom", __func__))
> return skb;
>
> + delta = SKB_DATA_ALIGN(delta);
> /* pskb_expand_head() might crash, if skb is shared */
> if (skb_shared(skb)) {
> struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
>
> - if (likely(nskb)) {
> - if (skb->sk)
> - skb_set_owner_w(nskb, skb->sk);
> - consume_skb(skb);
> - } else {
> + if (unlikely(!nskb)) {
> kfree_skb(skb);
> + return NULL;
> }
> + oskb = skb;
> skb = nskb;
> }
> - if (skb &&
> - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
> + if (__pskb_expand_head(skb, delta, 0, GFP_ATOMIC, true)) {
> kfree_skb(skb);
> - skb = NULL;
> + kfree_skb(oskb);
> + return NULL;
> + }
> + if (oskb) {
> + if (oskb->sk)
> + skb_set_owner_w(skb, oskb->sk);
> + consume_skb(oskb);
> }
> return skb;
> }
>
Powered by blists - more mailing lists