lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue,  9 Jun 2020 10:09:29 -0400
From:   Willem de Bruijn <willemdebruijn.kernel@...il.com>
To:     netdev@...r.kernel.org
Cc:     Willem de Bruijn <willemb@...gle.com>
Subject: [PATCH RFC net-next 1/6] net: multiple release time SO_TXTIME

From: Willem de Bruijn <willemb@...gle.com>

Pace transmission of segments in a UDP GSO datagram.

Batching datagram protocol stack traversals with UDP_SEGMENT saves
significant cycles for large data transfers.

But GSO packets are sent at once. Pacing traffic to internet clients
often requires sending just a few MSS per msec pacing interval.

SO_TXTIME allows delivery of packets at a later time. Extend it
to allow pacing the segments in a UDP GSO packet, to be able to build
larger GSO datagrams.

Add SO_TXTIME flag SOF_TXTIME_MULTI_RELEASE. This reinterprets the
lower 8 bits of the 64-bit release timestamp as

  - bits 4..7: release time interval in usec
  - bits 0..3: number of segments sent per period

So a timestamp of 0x148 means

  - 0x100 initial timestamp in Qdisc selected clocksource
  - every 4 usec release N MSS
  - N is 8

A subsequent qdisc change will pace the individual segments.

Packet transmission can race with the socket option. This is safe.
For predictable behavior, it is up to the caller to not toggle the
feature while packets on a socket are in flight.

Signed-off-by: Willem de Bruijn <willemb@...gle.com>
---
 include/linux/netdevice.h       |  1 +
 include/net/sock.h              |  3 ++-
 include/uapi/linux/net_tstamp.h |  3 ++-
 net/core/dev.c                  | 44 +++++++++++++++++++++++++++++++++
 net/core/sock.c                 |  4 +++
 5 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a96e9c4ec36..15ea976dd446 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4528,6 +4528,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features);
+struct sk_buff *skb_gso_segment_txtime(struct sk_buff *skb);
 
 struct netdev_bonding_info {
 	ifslave	slave;
diff --git a/include/net/sock.h b/include/net/sock.h
index c53cc42b5ab9..491e389b3570 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -493,7 +493,8 @@ struct sock {
 	u8			sk_clockid;
 	u8			sk_txtime_deadline_mode : 1,
 				sk_txtime_report_errors : 1,
-				sk_txtime_unused : 6;
+				sk_txtime_multi_release : 1,
+				sk_txtime_unused : 5;
 
 	struct socket		*sk_socket;
 	void			*sk_user_data;
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 7ed0b3d1c00a..ca1ae3b6f601 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -162,8 +162,9 @@ struct scm_ts_pktinfo {
 enum txtime_flags {
 	SOF_TXTIME_DEADLINE_MODE = (1 << 0),
 	SOF_TXTIME_REPORT_ERRORS = (1 << 1),
+	SOF_TXTIME_MULTI_RELEASE = (1 << 2),
 
-	SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_REPORT_ERRORS,
+	SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_MULTI_RELEASE,
 	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
 				 SOF_TXTIME_FLAGS_LAST
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 061496a1f640..5058083375fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3377,6 +3377,50 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(__skb_gso_segment);
 
+struct sk_buff *skb_gso_segment_txtime(struct sk_buff *skb)
+{
+	int mss_per_ival, mss_in_cur_ival;
+	struct sk_buff *segs, *seg;
+	struct skb_shared_info *sh;
+	u64 step_ns, tstamp;
+
+	if (!skb->sk || !sk_fullsock(skb->sk) ||
+	    !skb->sk->sk_txtime_multi_release)
+		return NULL;
+
+	/* extract multi release variables mss and stepsize */
+	mss_per_ival = skb->tstamp & 0xF;
+	step_ns = ((skb->tstamp >> 4) & 0xF) * NSEC_PER_MSEC;
+	tstamp = skb->tstamp;
+
+	if (mss_per_ival == 0)
+		return NULL;
+
+	/* skip multi-release if total segs can be sent at once */
+	sh = skb_shinfo(skb);
+	if (sh->gso_segs <= mss_per_ival)
+		return NULL;
+
+	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	if (IS_ERR_OR_NULL(segs))
+		return segs;
+
+	mss_in_cur_ival = 0;
+
+	for (seg = segs; seg; seg = seg->next) {
+		seg->tstamp = tstamp & ~0xFF;
+
+		mss_in_cur_ival++;
+		if (mss_in_cur_ival == mss_per_ival) {
+			tstamp += step_ns;
+			mss_in_cur_ival = 0;
+		}
+	}
+
+	return segs;
+}
+EXPORT_SYMBOL_GPL(skb_gso_segment_txtime);
+
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
 void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c4acf1f0220..7036b8855154 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1258,6 +1258,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
 		sk->sk_txtime_report_errors =
 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
+		sk->sk_txtime_multi_release =
+			!!(sk_txtime.flags & SOF_TXTIME_MULTI_RELEASE);
 		break;
 
 	case SO_BINDTOIFINDEX:
@@ -1608,6 +1610,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 				  SOF_TXTIME_DEADLINE_MODE : 0;
 		v.txtime.flags |= sk->sk_txtime_report_errors ?
 				  SOF_TXTIME_REPORT_ERRORS : 0;
+		v.txtime.flags |= sk->sk_txtime_multi_release ?
+				  SOF_TXTIME_MULTI_RELEASE : 0;
 		break;
 
 	case SO_BINDTOIFINDEX:
-- 
2.27.0.278.ge193c7cf3a9-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ