[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20260114212840.2511487-1-edumazet@google.com>
Date: Wed, 14 Jan 2026 21:28:40 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com,
Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next] net: split kmalloc_reserve()
kmalloc_reserve() is too big to be inlined.
Put the slow path in a new out-of-line function : kmalloc_pfmemalloc()
Then let kmalloc_reserve() set skb->pfmemalloc only when/if
the slow path is taken.
This means __alloc_skb() is faster :
- kmalloc_reserve() is now automatically inlined by both gcc and clang.
- No more expensive RMW (skb->pfmemalloc = pfmemalloc).
- No more expensive stack canary (for CONFIG_STACKPROTECTOR_STRONG=y).
- Removal of two prefetches that were coming too late for modern cpus.
Text size increase is quite small compared to the cpu savings (~0.5 %)
$ size net/core/skbuff.clang.before.o net/core/skbuff.clang.after.o
text data bss dec hex filename
72507 5897 0 78404 13244 net/core/skbuff.clang.before.o
72681 5897 0 78578 132f2 net/core/skbuff.clang.after.o
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
net/core/skbuff.c | 41 +++++++++++++++++++++--------------------
1 file changed, 21 insertions(+), 20 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 77508cf7c41e829a11a988d8de3d2673ff1ff121..3dd21f5c1b6c8f49f4673fa2f8a1969583ab6983 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -583,6 +583,16 @@ struct sk_buff *napi_build_skb(void *data, unsigned int frag_size)
}
EXPORT_SYMBOL(napi_build_skb);
+static void *kmalloc_pfmemalloc(size_t obj_size, gfp_t flags, int node)
+{
+ if (!gfp_pfmemalloc_allowed(flags))
+ return NULL;
+ if (!obj_size)
+ return kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
+ flags, node);
+ return kmalloc_node_track_caller(obj_size, flags, node);
+}
+
/*
* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
* the caller if emergency pfmemalloc reserves are being used. If it is and
@@ -591,9 +601,8 @@ EXPORT_SYMBOL(napi_build_skb);
* memory is free
*/
static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
- bool *pfmemalloc)
+ struct sk_buff *skb)
{
- bool ret_pfmemalloc = false;
size_t obj_size;
void *obj;
@@ -604,12 +613,12 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
*size = SKB_SMALL_HEAD_CACHE_SIZE;
- if (obj || !(gfp_pfmemalloc_allowed(flags)))
+ if (likely(obj))
goto out;
/* Try again but now we are using pfmemalloc reserves */
- ret_pfmemalloc = true;
- obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node);
- goto out;
+ if (skb)
+ skb->pfmemalloc = true;
+ return kmalloc_pfmemalloc(0, flags, node);
}
obj_size = kmalloc_size_roundup(obj_size);
@@ -625,17 +634,14 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj = kmalloc_node_track_caller(obj_size,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
- if (obj || !(gfp_pfmemalloc_allowed(flags)))
+ if (likely(obj))
goto out;
/* Try again but now we are using pfmemalloc reserves */
- ret_pfmemalloc = true;
- obj = kmalloc_node_track_caller(obj_size, flags, node);
-
+ if (skb)
+ skb->pfmemalloc = true;
+ obj = kmalloc_pfmemalloc(obj_size, flags, node);
out:
- if (pfmemalloc)
- *pfmemalloc = ret_pfmemalloc;
-
return obj;
}
@@ -667,7 +673,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct sk_buff *skb = NULL;
struct kmem_cache *cache;
- bool pfmemalloc;
u8 *data;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
@@ -697,25 +702,21 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
if (unlikely(!skb))
return NULL;
}
- prefetchw(skb);
+ skbuff_clear(skb);
/* We do our best to align skb_shared_info on a separate cache
* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
* Both skb->head and skb_shared_info are cache line aligned.
*/
- data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc);
+ data = kmalloc_reserve(&size, gfp_mask, node, skb);
if (unlikely(!data))
goto nodata;
/* kmalloc_size_roundup() might give us more room than requested.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- prefetchw(data + SKB_WITH_OVERHEAD(size));
-
- skbuff_clear(skb);
__finalize_skb_around(skb, data, size);
- skb->pfmemalloc = pfmemalloc;
if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff_fclones *fclones;
--
2.52.0.457.g6b5491de43-goog
Powered by blists - more mailing lists