lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241029144142.31382-1-annaemesenyiri@gmail.com>
Date: Tue, 29 Oct 2024 15:41:40 +0100
From: Anna Emese Nyiri <annaemesenyiri@...il.com>
To: netdev@...r.kernel.org
Cc: fejes@....elte.hu,
	annaemesenyiri@...il.com
Subject: [PATCH net-next] support SO_PRIORITY cmsg

The Linux socket API currently supports setting SO_PRIORITY at the socket
level, which applies a uniform priority to all packets sent through that
socket. The only exception is IP_TOS, if that is specified as ancillary
data, the packet does not inherit the socket's priority. Instead, the
priority value is computed when handling the ancillary data (as implemented
in commit <f02db315b8d888570cb0d4496cfbb7e4acb047cb>: "ipv4: IP_TOS
and IP_TTL can be specified as ancillary data").

Currently, there is no option to set the priority directly from userspace
on a per-packet basis. The following changes allow SO_PRIORITY to be set
through control messages (CMSG), giving userspace applications more
granular control over packet priorities.

This patch enables setting skb->priority using CMSG. If SO_PRIORITY is
specified as ancillary data, the packet is sent with the priority value
set through sockc->priority_cmsg_value, overriding the socket-level
values set via the traditional setsockopt() method. This is analogous to
existing support for SO_MARK (as implemented in commit
<c6af0c227a22bb6bb8ff72f043e0fb6d99fd6515>, “ip: support SO_MARK
cmsg”).

Suggested-by: Ferenc Fejes <fejes@....elte.hu>
Signed-off-by: Anna Emese Nyiri <annaemesenyiri@...il.com>
---
 include/net/inet_sock.h |  2 ++
 include/net/sock.h      |  5 ++++-
 net/can/raw.c           |  6 +++++-
 net/core/sock.c         | 12 ++++++++++++
 net/ipv4/ip_output.c    | 11 ++++++++++-
 net/ipv4/raw.c          |  5 ++++-
 net/ipv6/ip6_output.c   |  8 +++++++-
 net/ipv6/raw.c          |  6 +++++-
 net/packet/af_packet.c  |  6 +++++-
 9 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f9ddd47dc4f8..9d4e4e2a8232 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -175,6 +175,8 @@ struct inet_cork {
 	__u16			gso_size;
 	u64			transmit_time;
 	u32			mark;
+	__u8		priority_cmsg_set;
+	u32			priority_cmsg_value;
 };
 
 struct inet_cork_full {
diff --git a/include/net/sock.h b/include/net/sock.h
index cce23ac4d514..e02170977165 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1794,13 +1794,16 @@ struct sockcm_cookie {
 	u64 transmit_time;
 	u32 mark;
 	u32 tsflags;
+	u32 priority_cmsg_value;
+	u8 priority_cmsg_set;
 };
 
 static inline void sockcm_init(struct sockcm_cookie *sockc,
 			       const struct sock *sk)
 {
 	*sockc = (struct sockcm_cookie) {
-		.tsflags = READ_ONCE(sk->sk_tsflags)
+		.tsflags = READ_ONCE(sk->sk_tsflags),
+		.priority_cmsg_set = 0
 	};
 }
 
diff --git a/net/can/raw.c b/net/can/raw.c
index 00533f64d69d..cf7e7ae64cde 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -962,7 +962,11 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	}
 
 	skb->dev = dev;
-	skb->priority = READ_ONCE(sk->sk_priority);
+	if (sockc.priority_cmsg_set)
+		skb->priority = sockc.priority_cmsg_value;
+	else
+		skb->priority = READ_ONCE(sk->sk_priority);
+
 	skb->mark = READ_ONCE(sk->sk_mark);
 	skb->tstamp = sockc.transmit_time;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 9abc4fe25953..899bf850b52a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2863,6 +2863,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
 	case SCM_RIGHTS:
 	case SCM_CREDENTIALS:
 		break;
+	case SO_PRIORITY:
+		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+			return -EINVAL;
+
+		if ((*(u32 *)CMSG_DATA(cmsg) >= 0 && *(u32 *)CMSG_DATA(cmsg) <= 6) ||
+		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+			sockc->priority_cmsg_value = *(u32 *)CMSG_DATA(cmsg);
+			sockc->priority_cmsg_set = 1;
+			break;
+		}
+		return -EPERM;
 	default:
 		return -EINVAL;
 	}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b90d0f78ac80..0e44ebd031f7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1322,6 +1322,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->ttl = ipc->ttl;
 	cork->tos = ipc->tos;
 	cork->mark = ipc->sockc.mark;
+	cork->priority_cmsg_value = ipc->sockc.priority_cmsg_value;
+	cork->priority_cmsg_set = ipc->sockc.priority_cmsg_set;
 	cork->priority = ipc->priority;
 	cork->transmit_time = ipc->sockc.transmit_time;
 	cork->tx_flags = 0;
@@ -1455,8 +1457,15 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 		ip_options_build(skb, opt, cork->addr, rt);
 	}
 
-	skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
+	if (cork->tos != -1)
+		skb->priority = cork->priority;
+	else if (cork->priority_cmsg_set)
+		skb->priority = cork->priority_cmsg_value;
+	else
+		skb->priority = READ_ONCE(sk->sk_priority);
+
 	skb->mark = cork->mark;
+
 	if (sk_is_tcp(sk))
 		skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
 	else
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 474dfd263c8b..bbe481dc98a9 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -358,7 +358,10 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 	skb_reserve(skb, hlen);
 
 	skb->protocol = htons(ETH_P_IP);
-	skb->priority = READ_ONCE(sk->sk_priority);
+	if (sockc->priority_cmsg_set)
+		skb->priority = sockc->priority_cmsg_value;
+	else
+		skb->priority = READ_ONCE(sk->sk_priority);
 	skb->mark = sockc->mark;
 	skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
 	skb_dst_set(skb, &rt->dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f26841f1490f..4c4f4b76ef90 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1401,6 +1401,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	cork->base.gso_size = ipc6->gso_size;
 	cork->base.tx_flags = 0;
 	cork->base.mark = ipc6->sockc.mark;
+	cork->base.priority_cmsg_set = ipc6->sockc.priority_cmsg_set;
+	cork->base.priority_cmsg_value = ipc6->sockc.priority_cmsg_value;
 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
 
 	cork->base.length = 0;
@@ -1931,7 +1933,11 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 	hdr->saddr = fl6->saddr;
 	hdr->daddr = *final_dst;
 
-	skb->priority = READ_ONCE(sk->sk_priority);
+	if (cork->base.priority_cmsg_set)
+		skb->priority = cork->base.priority_cmsg_value;
+	else
+		skb->priority = READ_ONCE(sk->sk_priority);
+
 	skb->mark = cork->base.mark;
 	if (sk_is_tcp(sk))
 		skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 608fa9d05b55..6944dc3ec4c9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -619,7 +619,11 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb_reserve(skb, hlen);
 
 	skb->protocol = htons(ETH_P_IPV6);
-	skb->priority = READ_ONCE(sk->sk_priority);
+	if (sockc->priority_cmsg_set)
+		skb->priority = sockc->priority_cmsg_value;
+	else
+		skb->priority = READ_ONCE(sk->sk_priority);
+
 	skb->mark = sockc->mark;
 	skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4a364cdd445e..8b7924f775a4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3125,7 +3125,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	skb->protocol = proto;
 	skb->dev = dev;
-	skb->priority = READ_ONCE(sk->sk_priority);
+	if (sockc.priority_cmsg_set)
+		skb->priority = sockc.priority_cmsg_value;
+	else
+		skb->priority = READ_ONCE(sk->sk_priority);
+
 	skb->mark = sockc.mark;
 	skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
 
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ