lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7facb17b78f6cbbdb38f140872e02a345a0023f7.1749466540.git.jgh@exim.org>
Date: Mon,  9 Jun 2025 17:05:17 +0100
From: Jeremy Harris <jgh@...m.org>
To: netdev@...r.kernel.org
Cc: linux-api@...r.kernel.org,
	edumazet@...gle.com,
	ncardwell@...gle.com,
	Jeremy Harris <jgh@...m.org>
Subject: [PATCH net-next v3 1/6]     tcp: support writing to a socket in listening state

    In the tcp sendmsg handler, permit a write in LISTENING state if
    a MSG_PRELOAD flag is used.  Copy from iovec to a linear sk_buff
    for placement on the socket write queue.

Signed-off-by: Jeremy Harris <jgh@...m.org>
---
 include/linux/socket.h                        |   1 +
 net/ipv4/tcp.c                                | 112 ++++++++++++++++++
 .../perf/trace/beauty/include/linux/socket.h  |   1 +
 tools/perf/trace/beauty/msg_flags.c           |   3 +
 4 files changed, 117 insertions(+)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3b262487ec06..b41f4cd4dc97 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -330,6 +330,7 @@ struct ucred {
 #define MSG_SOCK_DEVMEM 0x2000000	/* Receive devmem skbs as cmsg */
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_SPLICE_PAGES 0x8000000	/* Splice the pages from the iterator in sendmsg() */
+#define MSG_PRELOAD	0x10000000	/* Preload tx data while listening */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f64f8276a73c..c0a787c1649d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1057,6 +1057,115 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
 	return err;
 }
 
+/* Cut-down version of tcp_sendmsg_locked(), for writing on a listen socket
+ */
+static int tcp_sendmsg_preload(struct sock *sk, struct msghdr *msg)
+{
+	struct sk_buff *skb;
+	int flags, err, copied = 0;
+	int size_goal;
+	int process_backlog = 0;
+	long timeo;
+
+	if (sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	flags = msg->msg_flags;
+
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	/* Ok commence sending. */
+restart:
+	/* Use a arbitrary "mss" value */
+	size_goal = 1000;
+
+	err = -EPIPE;
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		goto do_error;
+
+	while (msg_data_left(msg)) {
+		ssize_t copy = 0;
+
+		skb = tcp_write_queue_tail(sk);
+		if (skb)
+			copy = size_goal - skb->len;
+
+		trace_tcp_sendmsg_locked(sk, msg, skb, size_goal);
+
+		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
+			bool first_skb = !skb;
+
+			/* Limit to only one skb on the sk write queue */
+
+			if (!first_skb)
+				goto out_nopush;
+
+			if (!sk_stream_memory_free(sk))
+				goto wait_for_space;
+
+			if (unlikely(process_backlog >= 16)) {
+				process_backlog = 0;
+				if (sk_flush_backlog(sk))
+					goto restart;
+			}
+
+			skb = tcp_stream_alloc_skb(sk, sk->sk_allocation,
+						   first_skb);
+			if (!skb)
+				goto wait_for_space;
+
+			process_backlog++;
+
+#ifdef CONFIG_SKB_DECRYPTED
+			skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+#endif
+			tcp_skb_entail(sk, skb);
+			copy = size_goal;
+		}
+
+		/* Try to append data to the end of skb. */
+		if (copy > msg_data_left(msg))
+			copy = msg_data_left(msg);
+
+		copy = min_t(int, copy, skb_tailroom(skb));
+		err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
+		if (err)
+			goto do_error;
+
+		TCP_SKB_CB(skb)->end_seq += copy;
+		tcp_skb_pcount_set(skb, 0);
+
+		copied += copy;
+		goto out_nopush;
+
+wait_for_space:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		tcp_remove_empty_skb(sk);
+
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err != 0)
+			goto do_error;
+	}
+
+out_nopush:
+	return copied;
+
+do_error:
+	tcp_remove_empty_skb(sk);
+
+	if (copied)
+		goto out_nopush;
+
+	err = sk_stream_error(sk, flags, err);
+	/* make sure we wake any epoll edge trigger waiter */
+	if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
+		sk->sk_write_space(sk);
+		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+	}
+
+	return err;
+}
+
 int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct net_devmem_dmabuf_binding *binding = NULL;
@@ -1129,6 +1238,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			goto out_err;
 	}
 
+	if (unlikely(flags & MSG_PRELOAD))
+		return tcp_sendmsg_preload(sk, msg);
+
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index c3322eb3d686..e9ea498169f3 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -330,6 +330,7 @@ struct ucred {
 #define MSG_SOCK_DEVMEM 0x2000000	/* Receive devmem skbs as cmsg */
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_SPLICE_PAGES 0x8000000	/* Splice the pages from the iterator in sendmsg() */
+#define MSG_PRELOAD	0x10000000	/* Preload tx data while listening */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index 2da581ff0c80..27e40da9b02d 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -20,6 +20,9 @@
 #ifndef MSG_SPLICE_PAGES
 #define MSG_SPLICE_PAGES	0x8000000
 #endif
+#ifndef MSG_PRELOAD
+#define MSG_PRELOAD		0x10000000
+#endif
 #ifndef MSG_FASTOPEN
 #define MSG_FASTOPEN		0x20000000
 #endif
-- 
2.49.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ