netdev - Re: [PATCH net-next 4/4] net: add dedicated kmem

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACSApvYfnBBobJz5hXeyhKktGOqSOf4JNvQYOHXVdf6aysZ2Qw@mail.gmail.com>
Date:   Thu, 2 Feb 2023 15:14:10 -0500
From:   Soheil Hassas Yeganeh <soheil@...gle.com>
To:     Eric Dumazet <edumazet@...gle.com>
Cc:     "David S . Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org,
        eric.dumazet@...il.com, Alexander Duyck <alexanderduyck@...com>
Subject: Re: [PATCH net-next 4/4] net: add dedicated kmem_cache for
 typical/small skb->head

On Thu, Feb 2, 2023 at 1:58 PM Eric Dumazet <edumazet@...gle.com> wrote:
>
> Recent removal of ksize() in alloc_skb() increased
> performance because we no longer read
> the associated struct page.
>
> We have an equivalent cost at kfree_skb() time.
>
> kfree(skb->head) has to access a struct page,
> often cold in cpu caches to get the owning
> struct kmem_cache.
>
> Considering that many allocations are small,
> we can have our own kmem_cache to avoid the cache line miss.
>
> This also saves memory because these small heads
> are no longer padded to 1024 bytes.
>
> CONFIG_SLUB=y
> $ grep skbuff_small_head /proc/slabinfo
> skbuff_small_head   2907   2907    640   51    8 : tunables    0    0    0 : slabdata     57     57      0
>
> CONFIG_SLAB=y
> $ grep skbuff_small_head /proc/slabinfo
> skbuff_small_head    607    624    640    6    1 : tunables   54   27    8 : slabdata    104    104      5
>
> Note: after Kees Cook patches and this one, we might
> be able to revert commit
> dbae2b062824 ("net: skb: introduce and use a single page frag cache")
> because GRO_MAX_HEAD is also small.
>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> Cc: Paolo Abeni <pabeni@...hat.com>

Acked-by: Soheil Hassas Yeganeh <soheil@...gle.com>

Very nice!

> ---
>  net/core/skbuff.c | 52 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 47 insertions(+), 5 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index ae0b2aa1f01e8060cc4fe69137e9bd98e44280cc..3e540b4924701cc57b6fbd1b668bab3b652ee94c 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -89,6 +89,19 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
>  #ifdef CONFIG_SKB_EXTENSIONS
>  static struct kmem_cache *skbuff_ext_cache __ro_after_init;
>  #endif
> +static struct kmem_cache *skb_small_head_cache __ro_after_init;
> +
> +#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
> +
> +/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. */
> +#define SKB_SMALL_HEAD_CACHE_SIZE                                      \
> +       (is_power_of_2(SKB_SMALL_HEAD_SIZE) ?                   \
> +               (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) :        \
> +               SKB_SMALL_HEAD_SIZE)
> +
> +#define SKB_SMALL_HEAD_HEADROOM                                                \
> +       SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
> +
>  int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
>  EXPORT_SYMBOL(sysctl_max_skb_frags);
>
> @@ -486,6 +499,21 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
>         void *obj;
>
>         obj_size = SKB_HEAD_ALIGN(*size);
> +       if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
> +           !(flags & KMALLOC_NOT_NORMAL_BITS)) {
> +
> +               /* skb_small_head_cache has non power of two size,
> +                * likely forcing SLUB to use order-3 pages.
> +                * We deliberately attempt a NOMEMALLOC allocation only.
> +                */
> +               obj = kmem_cache_alloc_node(skb_small_head_cache,
> +                               flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
> +                               node);
> +               if (obj) {
> +                       *size = SKB_SMALL_HEAD_CACHE_SIZE;
> +                       goto out;
> +               }
> +       }
>         *size = obj_size = kmalloc_size_roundup(obj_size);
>         /*
>          * Try a regular allocation, when that fails and we're not entitled
> @@ -805,6 +833,14 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
>         return page_pool_return_skb_page(virt_to_page(data));
>  }
>
> +static void skb_kfree_head(void *head, unsigned int end_offset)
> +{
> +       if (end_offset == SKB_SMALL_HEAD_HEADROOM)
> +               kmem_cache_free(skb_small_head_cache, head);
> +       else
> +               kfree(head);
> +}
> +
>  static void skb_free_head(struct sk_buff *skb)
>  {
>         unsigned char *head = skb->head;
> @@ -814,7 +850,7 @@ static void skb_free_head(struct sk_buff *skb)
>                         return;
>                 skb_free_frag(head);
>         } else {
> -               kfree(head);
> +               skb_kfree_head(head, skb_end_offset(skb));
>         }
>  }
>
> @@ -1995,7 +2031,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
>         return 0;
>
>  nofrags:
> -       kfree(data);
> +       skb_kfree_head(data, size);
>  nodata:
>         return -ENOMEM;
>  }
> @@ -4633,6 +4669,12 @@ void __init skb_init(void)
>                                                 0,
>                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
>                                                 NULL);
> +       skb_small_head_cache = kmem_cache_create("skbuff_small_head",
> +                                               SKB_SMALL_HEAD_CACHE_SIZE,
> +                                               0,
> +                                               SLAB_HWCACHE_ALIGN | SLAB_PANIC,
> +                                               NULL);
> +
>         skb_extensions_init();
>  }
>
> @@ -6297,7 +6339,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
>         if (skb_cloned(skb)) {
>                 /* drop the old head gracefully */
>                 if (skb_orphan_frags(skb, gfp_mask)) {
> -                       kfree(data);
> +                       skb_kfree_head(data, size);
>                         return -ENOMEM;
>                 }
>                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> @@ -6405,7 +6447,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
>         memcpy((struct skb_shared_info *)(data + size),
>                skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
>         if (skb_orphan_frags(skb, gfp_mask)) {
> -               kfree(data);
> +               skb_kfree_head(data, size);
>                 return -ENOMEM;
>         }
>         shinfo = (struct skb_shared_info *)(data + size);
> @@ -6441,7 +6483,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
>                 /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
>                 if (skb_has_frag_list(skb))
>                         kfree_skb_list(skb_shinfo(skb)->frag_list);
> -               kfree(data);
> +               skb_kfree_head(data, size);
>                 return -ENOMEM;
>         }
>         skb_release_data(skb, SKB_CONSUMED);
> --
> 2.39.1.456.gfc5497dd1b-goog
>