lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20250623084212.122284-1-yangfeng59949@163.com>
Date: Mon, 23 Jun 2025 16:42:12 +0800
From: Feng Yang <yangfeng59949@....com>
To: davem@...emloft.net,
	edumazet@...gle.com,
	kuba@...nel.org,
	pabeni@...hat.com,
	horms@...nel.org,
	willemb@...gle.com,
	almasrymina@...gle.com,
	kerneljasonxing@...il.com,
	ebiggers@...gle.com,
	asml.silence@...il.com,
	aleksander.lobakin@...el.com
Cc: yangfeng@...inos.cn,
	netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH] skbuff: Improve the sending efficiency of __skb_send_sock

From: Feng Yang <yangfeng@...inos.cn>

By aggregating skb data into a bvec array for transmission, when using sockmap to forward large packets,
what previously required multiple transmissions now only needs a single transmission, which significantly enhances performance.
For small packets, the performance remains comparable to the original level.

When using sockmap for forwarding, the average latency for different packet sizes
after sending 10,000 packets is as follows:
size	old(us)		new(us)
512	56		55
1472	58		58
1600	106		79
3000	145		108
5000	182		123

Signed-off-by: Feng Yang <yangfeng@...inos.cn>
---
 net/core/skbuff.c | 112 +++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 60 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 85fc82f72d26..664443fc9baf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3235,84 +3235,75 @@ typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
 static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
 			   int len, sendmsg_func sendmsg, int flags)
 {
-	unsigned int orig_len = len;
 	struct sk_buff *head = skb;
 	unsigned short fragidx;
-	int slen, ret;
+	struct msghdr msg;
+	struct bio_vec *bvec;
+	int max_vecs, ret, slen;
+	int bvec_count = 0;
+	unsigned int copied = 0;
 
-do_frag_list:
-
-	/* Deal with head data */
-	while (offset < skb_headlen(skb) && len) {
-		struct kvec kv;
-		struct msghdr msg;
-
-		slen = min_t(int, len, skb_headlen(skb) - offset);
-		kv.iov_base = skb->data + offset;
-		kv.iov_len = slen;
-		memset(&msg, 0, sizeof(msg));
-		msg.msg_flags = MSG_DONTWAIT | flags;
-
-		iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
-		ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
-				      sendmsg_unlocked, sk, &msg);
-		if (ret <= 0)
-			goto error;
+	max_vecs = skb_shinfo(skb)->nr_frags + 1; // +1 for linear data
+	if (skb_has_frag_list(skb)) {
+		struct sk_buff *frag_skb = skb_shinfo(skb)->frag_list;
 
-		offset += ret;
-		len -= ret;
+		while (frag_skb) {
+			max_vecs += skb_shinfo(frag_skb)->nr_frags + 1; // +1 for linear data
+			frag_skb = frag_skb->next;
+		}
 	}
 
-	/* All the data was skb head? */
-	if (!len)
-		goto out;
+	bvec = kcalloc(max_vecs, sizeof(struct bio_vec), GFP_KERNEL);
+	if (!bvec)
+		return -ENOMEM;
 
-	/* Make offset relative to start of frags */
-	offset -= skb_headlen(skb);
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | flags;
+
+do_frag_list:
 
-	/* Find where we are in frag list */
-	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
-		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+	/* Deal with head data */
+	if (offset < skb_headlen(skb)) {
+		slen = min_t(int, skb_headlen(skb) - offset, len - copied);
+		struct page *page = virt_to_page(skb->data + offset);
+		unsigned int page_offset = offset_in_page(skb->data + offset);
 
-		if (offset < skb_frag_size(frag))
-			break;
+		if (!sendpage_ok(page))
+			msg.msg_flags &= ~MSG_SPLICE_PAGES;
 
-		offset -= skb_frag_size(frag);
+		bvec_set_page(&bvec[bvec_count++], page, slen, page_offset);
+		copied += slen;
+		offset += slen;
 	}
 
-	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
-		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+	/* Make offset relative to start of frags */
+	offset -= skb_headlen(skb);
 
-		slen = min_t(size_t, len, skb_frag_size(frag) - offset);
+	if (copied < len) {
+		for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+			skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+			unsigned int frag_size = skb_frag_size(frag);
 
-		while (slen) {
-			struct bio_vec bvec;
-			struct msghdr msg = {
-				.msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT |
-					     flags,
-			};
+			/* Find where we are in frag list */
+			if (offset >= frag_size) {
+				offset -= frag_size;
+				continue;
+			}
 
-			bvec_set_page(&bvec, skb_frag_page(frag), slen,
+			slen = min_t(size_t, frag_size - offset, len - copied);
+			bvec_set_page(&bvec[bvec_count++], skb_frag_page(frag), slen,
 				      skb_frag_off(frag) + offset);
-			iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
-				      slen);
 
-			ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
-					      sendmsg_unlocked, sk, &msg);
-			if (ret <= 0)
-				goto error;
+			copied += slen;
+			offset = 0;
 
-			len -= ret;
-			offset += ret;
-			slen -= ret;
+			if (copied >= len)
+				break;
 		}
-
-		offset = 0;
 	}
 
-	if (len) {
+	if (copied < len) {
 		/* Process any frag lists */
-
 		if (skb == head) {
 			if (skb_has_frag_list(skb)) {
 				skb = skb_shinfo(skb)->frag_list;
@@ -3324,11 +3315,12 @@ static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
 		}
 	}
 
-out:
-	return orig_len - len;
+	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bvec_count, len);
+	ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked, sendmsg_unlocked, sk, &msg);
+
+	kfree(bvec);
 
-error:
-	return orig_len == len ? ret : orig_len - len;
+	return ret;
 }
 
 /* Send skb data on a socket. Socket must be locked. */
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ