[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241029144142.31382-1-annaemesenyiri@gmail.com>
Date: Tue, 29 Oct 2024 15:41:40 +0100
From: Anna Emese Nyiri <annaemesenyiri@...il.com>
To: netdev@...r.kernel.org
Cc: fejes@....elte.hu,
annaemesenyiri@...il.com
Subject: [PATCH net-next] support SO_PRIORITY cmsg
The Linux socket API currently supports setting SO_PRIORITY at the socket
level, which applies a uniform priority to all packets sent through that
socket. The only exception is IP_TOS, if that is specified as ancillary
data, the packet does not inherit the socket's priority. Instead, the
priority value is computed when handling the ancillary data (as implemented
in commit <f02db315b8d888570cb0d4496cfbb7e4acb047cb>: "ipv4: IP_TOS
and IP_TTL can be specified as ancillary data").
Currently, there is no option to set the priority directly from userspace
on a per-packet basis. The following changes allow SO_PRIORITY to be set
through control messages (CMSG), giving userspace applications more
granular control over packet priorities.
This patch enables setting skb->priority using CMSG. If SO_PRIORITY is
specified as ancillary data, the packet is sent with the priority value
set through sockc->priority_cmsg_value, overriding the socket-level
values set via the traditional setsockopt() method. This is analogous to
existing support for SO_MARK (as implemented in commit
<c6af0c227a22bb6bb8ff72f043e0fb6d99fd6515>, “ip: support SO_MARK
cmsg”).
Suggested-by: Ferenc Fejes <fejes@....elte.hu>
Signed-off-by: Anna Emese Nyiri <annaemesenyiri@...il.com>
---
include/net/inet_sock.h | 2 ++
include/net/sock.h | 5 ++++-
net/can/raw.c | 6 +++++-
net/core/sock.c | 12 ++++++++++++
net/ipv4/ip_output.c | 11 ++++++++++-
net/ipv4/raw.c | 5 ++++-
net/ipv6/ip6_output.c | 8 +++++++-
net/ipv6/raw.c | 6 +++++-
net/packet/af_packet.c | 6 +++++-
9 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f9ddd47dc4f8..9d4e4e2a8232 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -175,6 +175,8 @@ struct inet_cork {
__u16 gso_size;
u64 transmit_time;
u32 mark;
+ __u8 priority_cmsg_set;
+ u32 priority_cmsg_value;
};
struct inet_cork_full {
diff --git a/include/net/sock.h b/include/net/sock.h
index cce23ac4d514..e02170977165 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1794,13 +1794,16 @@ struct sockcm_cookie {
u64 transmit_time;
u32 mark;
u32 tsflags;
+ u32 priority_cmsg_value;
+ u8 priority_cmsg_set;
};
static inline void sockcm_init(struct sockcm_cookie *sockc,
const struct sock *sk)
{
*sockc = (struct sockcm_cookie) {
- .tsflags = READ_ONCE(sk->sk_tsflags)
+ .tsflags = READ_ONCE(sk->sk_tsflags),
+ .priority_cmsg_set = 0
};
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 00533f64d69d..cf7e7ae64cde 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -962,7 +962,11 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
skb->dev = dev;
- skb->priority = READ_ONCE(sk->sk_priority);
+ if (sockc.priority_cmsg_set)
+ skb->priority = sockc.priority_cmsg_value;
+ else
+ skb->priority = READ_ONCE(sk->sk_priority);
+
skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
diff --git a/net/core/sock.c b/net/core/sock.c
index 9abc4fe25953..899bf850b52a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2863,6 +2863,18 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
case SCM_RIGHTS:
case SCM_CREDENTIALS:
break;
+ case SO_PRIORITY:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+
+ if ((*(u32 *)CMSG_DATA(cmsg) >= 0 && *(u32 *)CMSG_DATA(cmsg) <= 6) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+ sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ sockc->priority_cmsg_value = *(u32 *)CMSG_DATA(cmsg);
+ sockc->priority_cmsg_set = 1;
+ break;
+ }
+ return -EPERM;
default:
return -EINVAL;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b90d0f78ac80..0e44ebd031f7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1322,6 +1322,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->ttl = ipc->ttl;
cork->tos = ipc->tos;
cork->mark = ipc->sockc.mark;
+ cork->priority_cmsg_value = ipc->sockc.priority_cmsg_value;
+ cork->priority_cmsg_set = ipc->sockc.priority_cmsg_set;
cork->priority = ipc->priority;
cork->transmit_time = ipc->sockc.transmit_time;
cork->tx_flags = 0;
@@ -1455,8 +1457,15 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_options_build(skb, opt, cork->addr, rt);
}
- skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
+ if (cork->tos != -1)
+ skb->priority = cork->priority;
+ else if (cork->priority_cmsg_set)
+ skb->priority = cork->priority_cmsg_value;
+ else
+ skb->priority = READ_ONCE(sk->sk_priority);
+
skb->mark = cork->mark;
+
if (sk_is_tcp(sk))
skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
else
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 474dfd263c8b..bbe481dc98a9 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -358,7 +358,10 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IP);
- skb->priority = READ_ONCE(sk->sk_priority);
+ if (sockc->priority_cmsg_set)
+ skb->priority = sockc->priority_cmsg_value;
+ else
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
skb_dst_set(skb, &rt->dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f26841f1490f..4c4f4b76ef90 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1401,6 +1401,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
+ cork->base.priority_cmsg_set = ipc6->sockc.priority_cmsg_set;
+ cork->base.priority_cmsg_value = ipc6->sockc.priority_cmsg_value;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
cork->base.length = 0;
@@ -1931,7 +1933,11 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
- skb->priority = READ_ONCE(sk->sk_priority);
+ if (cork->base.priority_cmsg_set)
+ skb->priority = cork->base.priority_cmsg_value;
+ else
+ skb->priority = READ_ONCE(sk->sk_priority);
+
skb->mark = cork->base.mark;
if (sk_is_tcp(sk))
skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 608fa9d05b55..6944dc3ec4c9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -619,7 +619,11 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = READ_ONCE(sk->sk_priority);
+ if (sockc->priority_cmsg_set)
+ skb->priority = sockc->priority_cmsg_value;
+ else
+ skb->priority = READ_ONCE(sk->sk_priority);
+
skb->mark = sockc->mark;
skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4a364cdd445e..8b7924f775a4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3125,7 +3125,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->protocol = proto;
skb->dev = dev;
- skb->priority = READ_ONCE(sk->sk_priority);
+ if (sockc.priority_cmsg_set)
+ skb->priority = sockc.priority_cmsg_value;
+ else
+ skb->priority = READ_ONCE(sk->sk_priority);
+
skb->mark = sockc.mark;
skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
--
2.43.0
Powered by blists - more mailing lists