[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250222183057.800800-3-xiyou.wangcong@gmail.com>
Date: Sat, 22 Feb 2025 10:30:55 -0800
From: Cong Wang <xiyou.wangcong@...il.com>
To: netdev@...r.kernel.org
Cc: bpf@...r.kernel.org,
john.fastabend@...il.com,
jakub@...udflare.com,
zhoufeng.zf@...edance.com,
zijianzhang@...edance.com,
Cong Wang <cong.wang@...edance.com>
Subject: [Patch bpf-next 2/4] skmsg: implement slab allocator cache for sk_msg
From: Zijian Zhang <zijianzhang@...edance.com>
Optimizing redirect ingress performance requires frequent allocation and
deallocation of sk_msg structures. Introduce a dedicated kmem_cache for
sk_msg to reduce memory allocation overhead and improve performance.
Reviewed-by: Cong Wang <cong.wang@...edance.com>
Signed-off-by: Zijian Zhang <zijianzhang@...edance.com>
---
include/linux/skmsg.h | 21 ++++++++++++---------
net/core/skmsg.c | 28 +++++++++++++++++++++-------
net/ipv4/tcp_bpf.c | 5 ++---
3 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index d6f0a8cd73c4..bf28ce9b5fdb 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -121,6 +121,7 @@ struct sk_psock {
struct rcu_work rwork;
};
+struct sk_msg *sk_msg_alloc(gfp_t gfp);
int sk_msg_expand(struct sock *sk, struct sk_msg *msg, int len,
int elem_first_coalesce);
int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
@@ -143,6 +144,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags);
bool sk_msg_is_readable(struct sock *sk);
+extern struct kmem_cache *sk_msg_cachep;
+
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
WARN_ON(i == msg->sg.end && bytes);
@@ -319,6 +322,13 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
}
+static inline void kfree_sk_msg(struct sk_msg *msg)
+{
+ if (msg->skb)
+ consume_skb(msg->skb);
+ kmem_cache_free(sk_msg_cachep, msg);
+}
+
static inline bool sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
@@ -330,7 +340,7 @@ static inline bool sk_psock_queue_msg(struct sk_psock *psock,
ret = true;
} else {
sk_msg_free(psock->sk, msg);
- kfree(msg);
+ kfree_sk_msg(msg);
ret = false;
}
spin_unlock_bh(&psock->ingress_lock);
@@ -378,13 +388,6 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
return psock ? list_empty(&psock->ingress_msg) : true;
}
-static inline void kfree_sk_msg(struct sk_msg *msg)
-{
- if (msg->skb)
- consume_skb(msg->skb);
- kfree(msg);
-}
-
static inline void sk_psock_report_error(struct sk_psock *psock, int err)
{
struct sock *sk = psock->sk;
@@ -441,7 +444,7 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
{
if (psock->cork) {
sk_msg_free(psock->sk, psock->cork);
- kfree(psock->cork);
+ kfree_sk_msg(psock->cork);
psock->cork = NULL;
}
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 4695cbd9c16f..25c53c8c9857 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -10,6 +10,8 @@
#include <net/tls.h>
#include <trace/events/sock.h>
+struct kmem_cache *sk_msg_cachep;
+
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
{
if (msg->sg.end > msg->sg.start &&
@@ -503,16 +505,17 @@ bool sk_msg_is_readable(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_msg_is_readable);
-static struct sk_msg *alloc_sk_msg(gfp_t gfp)
+struct sk_msg *sk_msg_alloc(gfp_t gfp)
{
struct sk_msg *msg;
- msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN);
+ msg = kmem_cache_zalloc(sk_msg_cachep, gfp | __GFP_NOWARN);
if (unlikely(!msg))
return NULL;
sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
return msg;
}
+EXPORT_SYMBOL_GPL(sk_msg_alloc);
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
@@ -523,7 +526,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
if (!sk_rmem_schedule(sk, skb, skb->truesize))
return NULL;
- return alloc_sk_msg(GFP_KERNEL);
+ return sk_msg_alloc(GFP_KERNEL);
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -592,7 +595,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
- kfree(msg);
+ kfree_sk_msg(msg);
return err;
}
@@ -603,7 +606,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
{
- struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
+ struct sk_msg *msg = sk_msg_alloc(GFP_ATOMIC);
struct sock *sk = psock->sk;
int err;
@@ -612,7 +615,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
skb_set_owner_r(skb, sk);
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
- kfree(msg);
+ kfree_sk_msg(msg);
return err;
}
@@ -781,7 +784,7 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
if (!msg->skb)
atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
sk_msg_free(psock->sk, msg);
- kfree(msg);
+ kfree_sk_msg(msg);
}
}
@@ -1266,3 +1269,14 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
sk->sk_data_ready = psock->saved_data_ready;
psock->saved_data_ready = NULL;
}
+
+static int __init sk_msg_cachep_init(void)
+{
+ sk_msg_cachep = kmem_cache_create("sk_msg_cachep",
+ sizeof(struct sk_msg),
+ 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
+ NULL);
+ return 0;
+}
+late_initcall(sk_msg_cachep_init);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 85b64ffc20c6..f0ef41c951e2 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -38,7 +38,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *tmp;
int i, ret = 0;
- tmp = kzalloc(sizeof(*tmp), __GFP_NOWARN | GFP_KERNEL);
+ tmp = sk_msg_alloc(GFP_KERNEL);
if (unlikely(!tmp))
return -ENOMEM;
@@ -406,8 +406,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
msg->cork_bytes > msg->sg.size && !enospc) {
psock->cork_bytes = msg->cork_bytes - msg->sg.size;
if (!psock->cork) {
- psock->cork = kzalloc(sizeof(*psock->cork),
- GFP_ATOMIC | __GFP_NOWARN);
+ psock->cork = sk_msg_alloc(GFP_ATOMIC);
if (!psock->cork)
return -ENOMEM;
}
--
2.34.1
Powered by blists - more mailing lists