[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 2 Feb 2023 15:14:10 -0500
From: Soheil Hassas Yeganeh <soheil@...gle.com>
To: Eric Dumazet <edumazet@...gle.com>
Cc: "David S . Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org,
eric.dumazet@...il.com, Alexander Duyck <alexanderduyck@...com>
Subject: Re: [PATCH net-next 4/4] net: add dedicated kmem_cache for
typical/small skb->head
On Thu, Feb 2, 2023 at 1:58 PM Eric Dumazet <edumazet@...gle.com> wrote:
>
> Recent removal of ksize() in alloc_skb() increased
> performance because we no longer read
> the associated struct page.
>
> We have an equivalent cost at kfree_skb() time.
>
> kfree(skb->head) has to access a struct page,
> often cold in cpu caches to get the owning
> struct kmem_cache.
>
> Considering that many allocations are small,
> we can have our own kmem_cache to avoid the cache line miss.
>
> This also saves memory because these small heads
> are no longer padded to 1024 bytes.
>
> CONFIG_SLUB=y
> $ grep skbuff_small_head /proc/slabinfo
> skbuff_small_head 2907 2907 640 51 8 : tunables 0 0 0 : slabdata 57 57 0
>
> CONFIG_SLAB=y
> $ grep skbuff_small_head /proc/slabinfo
> skbuff_small_head 607 624 640 6 1 : tunables 54 27 8 : slabdata 104 104 5
>
> Note: after Kees Cook patches and this one, we might
> be able to revert commit
> dbae2b062824 ("net: skb: introduce and use a single page frag cache")
> because GRO_MAX_HEAD is also small.
>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> Cc: Paolo Abeni <pabeni@...hat.com>
Acked-by: Soheil Hassas Yeganeh <soheil@...gle.com>
Very nice!
> ---
> net/core/skbuff.c | 52 ++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 47 insertions(+), 5 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index ae0b2aa1f01e8060cc4fe69137e9bd98e44280cc..3e540b4924701cc57b6fbd1b668bab3b652ee94c 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -89,6 +89,19 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
> #ifdef CONFIG_SKB_EXTENSIONS
> static struct kmem_cache *skbuff_ext_cache __ro_after_init;
> #endif
> +static struct kmem_cache *skb_small_head_cache __ro_after_init;
> +
> +#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
> +
> +/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. */
> +#define SKB_SMALL_HEAD_CACHE_SIZE \
> + (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \
> + (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \
> + SKB_SMALL_HEAD_SIZE)
> +
> +#define SKB_SMALL_HEAD_HEADROOM \
> + SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
> +
> int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
> EXPORT_SYMBOL(sysctl_max_skb_frags);
>
> @@ -486,6 +499,21 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
> void *obj;
>
> obj_size = SKB_HEAD_ALIGN(*size);
> + if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
> + !(flags & KMALLOC_NOT_NORMAL_BITS)) {
> +
> + /* skb_small_head_cache has non power of two size,
> + * likely forcing SLUB to use order-3 pages.
> + * We deliberately attempt a NOMEMALLOC allocation only.
> + */
> + obj = kmem_cache_alloc_node(skb_small_head_cache,
> + flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
> + node);
> + if (obj) {
> + *size = SKB_SMALL_HEAD_CACHE_SIZE;
> + goto out;
> + }
> + }
> *size = obj_size = kmalloc_size_roundup(obj_size);
> /*
> * Try a regular allocation, when that fails and we're not entitled
> @@ -805,6 +833,14 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
> return page_pool_return_skb_page(virt_to_page(data));
> }
>
> +static void skb_kfree_head(void *head, unsigned int end_offset)
> +{
> + if (end_offset == SKB_SMALL_HEAD_HEADROOM)
> + kmem_cache_free(skb_small_head_cache, head);
> + else
> + kfree(head);
> +}
> +
> static void skb_free_head(struct sk_buff *skb)
> {
> unsigned char *head = skb->head;
> @@ -814,7 +850,7 @@ static void skb_free_head(struct sk_buff *skb)
> return;
> skb_free_frag(head);
> } else {
> - kfree(head);
> + skb_kfree_head(head, skb_end_offset(skb));
> }
> }
>
> @@ -1995,7 +2031,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
> return 0;
>
> nofrags:
> - kfree(data);
> + skb_kfree_head(data, size);
> nodata:
> return -ENOMEM;
> }
> @@ -4633,6 +4669,12 @@ void __init skb_init(void)
> 0,
> SLAB_HWCACHE_ALIGN|SLAB_PANIC,
> NULL);
> + skb_small_head_cache = kmem_cache_create("skbuff_small_head",
> + SKB_SMALL_HEAD_CACHE_SIZE,
> + 0,
> + SLAB_HWCACHE_ALIGN | SLAB_PANIC,
> + NULL);
> +
> skb_extensions_init();
> }
>
> @@ -6297,7 +6339,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
> if (skb_cloned(skb)) {
> /* drop the old head gracefully */
> if (skb_orphan_frags(skb, gfp_mask)) {
> - kfree(data);
> + skb_kfree_head(data, size);
> return -ENOMEM;
> }
> for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> @@ -6405,7 +6447,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
> memcpy((struct skb_shared_info *)(data + size),
> skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
> if (skb_orphan_frags(skb, gfp_mask)) {
> - kfree(data);
> + skb_kfree_head(data, size);
> return -ENOMEM;
> }
> shinfo = (struct skb_shared_info *)(data + size);
> @@ -6441,7 +6483,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
> /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
> if (skb_has_frag_list(skb))
> kfree_skb_list(skb_shinfo(skb)->frag_list);
> - kfree(data);
> + skb_kfree_head(data, size);
> return -ENOMEM;
> }
> skb_release_data(skb, SKB_CONSUMED);
> --
> 2.39.1.456.gfc5497dd1b-goog
>
Powered by blists - more mailing lists