[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250627094406.100919-1-yangfeng59949@163.com>
Date: Fri, 27 Jun 2025 17:44:06 +0800
From: Feng Yang <yangfeng59949@....com>
To: davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
horms@...nel.org,
willemb@...gle.com,
almasrymina@...gle.com,
kerneljasonxing@...il.com,
ebiggers@...gle.com,
asml.silence@...il.com,
aleksander.lobakin@...el.com,
stfomichev@...il.com
Cc: yangfeng@...inos.cn,
netdev@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v2] skbuff: Improve the sending efficiency of __skb_send_sock
From: Feng Yang <yangfeng@...inos.cn>
By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
For small packets, the performance remains comparable to the original level.
When using sockmap for forwarding, the average latency for different packet sizes
after sending 10,000 packets is as follows:
size old(us) new(us)
512 56 55
1472 58 58
1600 106 79
3000 145 108
5000 182 123
Signed-off-by: Feng Yang <yangfeng@...inos.cn>
---
Changes in v2:
- Delete dynamic memory allocation, thanks: Paolo Abeni,Stanislav Fomichev.
- Link to v1: https://lore.kernel.org/all/20250623084212.122284-1-yangfeng59949@163.com/
---
net/core/skbuff.c | 145 ++++++++++++++++++++++------------------------
1 file changed, 68 insertions(+), 77 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 85fc82f72d26..aae5139cfb28 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3231,104 +3231,95 @@ static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg)
return sock_sendmsg(sock, msg);
}
+#define MAX_SKB_SEND_BIOVEC_SIZE 16
typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
int len, sendmsg_func sendmsg, int flags)
{
- unsigned int orig_len = len;
struct sk_buff *head = skb;
unsigned short fragidx;
- int slen, ret;
-
-do_frag_list:
-
- /* Deal with head data */
- while (offset < skb_headlen(skb) && len) {
- struct kvec kv;
- struct msghdr msg;
-
- slen = min_t(int, len, skb_headlen(skb) - offset);
- kv.iov_base = skb->data + offset;
- kv.iov_len = slen;
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT | flags;
-
- iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
- ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
- sendmsg_unlocked, sk, &msg);
- if (ret <= 0)
- goto error;
-
- offset += ret;
- len -= ret;
- }
-
- /* All the data was skb head? */
- if (!len)
- goto out;
+ struct msghdr msg;
+ struct bio_vec bvec[MAX_SKB_SEND_BIOVEC_SIZE];
+ int ret, slen, total_len = 0;
+ int bvec_count = 0;
+ unsigned int copied = 0;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | flags;
+
+ while (copied < len) {
+ /* Deal with head data */
+ if (offset < skb_headlen(skb) && bvec_count < MAX_SKB_SEND_BIOVEC_SIZE) {
+ struct page *page = virt_to_page(skb->data + offset);
+ unsigned int page_offset = offset_in_page(skb->data + offset);
+
+ if (!sendpage_ok(page))
+ msg.msg_flags &= ~MSG_SPLICE_PAGES;
+
+ slen = min_t(int, skb_headlen(skb) - offset, len - copied);
+ bvec_set_page(&bvec[bvec_count++], page, slen, page_offset);
+ copied += slen;
+ offset += slen;
+ }
- /* Make offset relative to start of frags */
- offset -= skb_headlen(skb);
+ /* Make offset relative to start of frags */
+ offset -= skb_headlen(skb);
- /* Find where we are in frag list */
- for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
+ if (copied < len && bvec_count < MAX_SKB_SEND_BIOVEC_SIZE) {
+ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
+ unsigned int frag_size = skb_frag_size(frag);
- if (offset < skb_frag_size(frag))
- break;
+ /* Find where we are in frag list */
+ if (offset >= frag_size) {
+ offset -= frag_size;
+ continue;
+ }
- offset -= skb_frag_size(frag);
- }
+ slen = min_t(size_t, frag_size - offset, len - copied);
+ bvec_set_page(&bvec[bvec_count++], skb_frag_page(frag), slen,
+ skb_frag_off(frag) + offset);
- for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
+ copied += slen;
+ offset = 0;
- slen = min_t(size_t, len, skb_frag_size(frag) - offset);
+ if (copied >= len || bvec_count >= MAX_SKB_SEND_BIOVEC_SIZE)
+ break;
+ }
+ }
- while (slen) {
- struct bio_vec bvec;
- struct msghdr msg = {
- .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT |
- flags,
- };
+ if (copied < len && bvec_count < MAX_SKB_SEND_BIOVEC_SIZE) {
+ /* Process any frag lists */
+ if (skb == head) {
+ if (skb_has_frag_list(skb))
+ skb = skb_shinfo(skb)->frag_list;
+ } else if (skb->next) {
+ skb = skb->next;
+ }
+ }
- bvec_set_page(&bvec, skb_frag_page(frag), slen,
- skb_frag_off(frag) + offset);
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
- slen);
+ if (bvec_count == MAX_SKB_SEND_BIOVEC_SIZE || copied == len) {
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bvec_count, len);
+ ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, sendmsg_unlocked, sk, &msg);
- ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
- sendmsg_unlocked, sk, &msg);
- if (ret <= 0)
- goto error;
+ if (ret < 0)
+ return ret;
+ /* Statistical data */
len -= ret;
offset += ret;
- slen -= ret;
+ total_len += ret;
+
+ /* Restore initial value */
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | flags;
+ copied = 0;
+ bvec_count = 0;
+ skb = head;
}
-
- offset = 0;
}
- if (len) {
- /* Process any frag lists */
-
- if (skb == head) {
- if (skb_has_frag_list(skb)) {
- skb = skb_shinfo(skb)->frag_list;
- goto do_frag_list;
- }
- } else if (skb->next) {
- skb = skb->next;
- goto do_frag_list;
- }
- }
-
-out:
- return orig_len - len;
-
-error:
- return orig_len == len ? ret : orig_len - len;
+ return total_len;
}
/* Send skb data on a socket. Socket must be locked. */
--
2.43.0
Powered by blists - more mailing lists