[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241012040651.95616-2-kerneljasonxing@gmail.com>
Date: Sat, 12 Oct 2024 12:06:40 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
dsahern@...nel.org,
willemdebruijn.kernel@...il.com,
willemb@...gle.com,
ast@...nel.org,
daniel@...earbox.net,
andrii@...nel.org,
martin.lau@...ux.dev,
eddyz87@...il.com,
song@...nel.org,
yonghong.song@...ux.dev,
john.fastabend@...il.com,
kpsingh@...nel.org,
sdf@...ichev.me,
haoluo@...gle.com,
jolsa@...nel.org
Cc: bpf@...r.kernel.org,
netdev@...r.kernel.org,
Jason Xing <kernelxing@...cent.com>
Subject: [PATCH net-next v2 01/12] net-timestamp: introduce socket tsflag requestors
From: Jason Xing <kernelxing@...cent.com>
We need a separate tsflag to control bpf extension feature so that
we will not affect the behaviors of existing applications.
The idea of introducing requestors for better extension (not only
serving bpf extension) comes from Vadim Fedorenko.
Signed-off-by: Jason Xing <kernelxing@...cent.com>
---
include/net/ip.h | 2 +-
include/net/sock.h | 15 +++++++++++----
net/can/j1939/socket.c | 2 +-
net/core/skbuff.c | 5 +++--
net/core/sock.c | 8 ++++----
net/ipv4/ip_output.c | 2 +-
net/ipv4/ip_sockglue.c | 2 +-
net/ipv4/tcp.c | 2 +-
net/ipv6/ip6_output.c | 2 +-
net/ipv6/ping.c | 2 +-
net/ipv6/raw.c | 2 +-
net/ipv6/udp.c | 2 +-
net/sctp/socket.c | 2 +-
net/socket.c | 4 ++--
14 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/include/net/ip.h b/include/net/ip.h
index bab084df1567..b0a836aebc33 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -96,7 +96,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm_init(ipcm);
ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
- ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
+ ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
ipcm->protocol = inet->inet_num;
diff --git a/include/net/sock.h b/include/net/sock.h
index b32f1424ecc5..8cf278c957b3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -234,6 +234,13 @@ struct sock_common {
struct bpf_local_storage;
struct sk_filter;
+enum {
+ SOCKETOPT_TS_REQUESTOR = 0,
+ BPFPROG_TS_REQUESTOR,
+
+ __MAX_TS_REQUESTOR,
+};
+
/**
* struct sock - network layer representation of sockets
* @__sk_common: shared layout with inet_timewait_sock
@@ -444,7 +451,7 @@ struct sock {
socket_lock_t sk_lock;
u32 sk_reserved_mem;
int sk_forward_alloc;
- u32 sk_tsflags;
+ u32 sk_tsflags[__MAX_TS_REQUESTOR];
__cacheline_group_end(sock_write_rxtx);
__cacheline_group_begin(sock_write_tx);
@@ -1809,7 +1816,7 @@ static inline void sockcm_init(struct sockcm_cookie *sockc,
const struct sock *sk)
{
*sockc = (struct sockcm_cookie) {
- .tsflags = READ_ONCE(sk->sk_tsflags)
+ .tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR])
};
}
@@ -2617,7 +2624,7 @@ static inline void
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
- u32 tsflags = READ_ONCE(sk->sk_tsflags);
+ u32 tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
ktime_t kt = skb->tstamp;
/*
* generate control messages if
@@ -2652,7 +2659,7 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
SOF_TIMESTAMPING_RAW_HARDWARE)
if (sk->sk_flags & FLAGS_RECV_CMSGS ||
- READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+ READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & TSFLAGS_ANY)
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 305dd72c844c..8f5799930a93 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -996,7 +996,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
- tsflags = READ_ONCE(sk->sk_tsflags);
+ tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
switch (type) {
case J1939_ERRQUEUE_TX_ACK:
if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 00afeb90c23a..ab0a59f1e14d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5490,7 +5490,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats;
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
- if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) &
+ SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
@@ -5551,7 +5552,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
- tsflags = READ_ONCE(sk->sk_tsflags);
+ tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
return;
diff --git a/net/core/sock.c b/net/core/sock.c
index 083d438d8b6f..52c8c5a5ba27 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -908,7 +908,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
return -EINVAL;
if (val & SOF_TIMESTAMPING_OPT_ID &&
- !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
+ !(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR] & SOF_TIMESTAMPING_OPT_ID)) {
if (sk_is_tcp(sk)) {
if ((1 << sk->sk_state) &
(TCPF_CLOSE | TCPF_LISTEN))
@@ -932,7 +932,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
return ret;
}
- WRITE_ONCE(sk->sk_tsflags, val);
+ WRITE_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR], val);
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
@@ -1797,7 +1797,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
* Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
*/
if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
- v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+ v.timestamping.flags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
}
break;
@@ -2930,7 +2930,7 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
case SCM_TS_OPT_ID:
if (sk_is_tcp(sk))
return -EINVAL;
- tsflags = READ_ONCE(sk->sk_tsflags);
+ tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
return -EINVAL;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5c55a95063d..ded504458d5d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1050,7 +1050,7 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_OPT_ID) {
if (cork->flags & IPCORK_TS_OPT_ID) {
tskey = cork->ts_opt_id;
} else {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cf377377b52d..fac8f593c43a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -509,7 +509,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
info = PKTINFO_SKB_CB(skb);
- if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
+ if (!(READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 82cc4a5633ce..6c8968eb4427 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2245,7 +2245,7 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
struct scm_timestamping_internal *tss)
{
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
- u32 tsflags = READ_ONCE(sk->sk_tsflags);
+ u32 tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
bool has_timestamping = false;
if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 205673179b3c..c983e0ca6f72 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1547,7 +1547,7 @@ static int __ip6_append_data(struct sock *sk,
}
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_OPT_ID) {
if (cork->flags & IPCORK_TS_OPT_ID) {
tskey = cork->ts_opt_id;
} else {
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 88b3fcacd4f9..0080b7c3a475 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
ipcm6_init_sk(&ipc6, sk);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8476a3944a88..cd02aa02d813 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -778,7 +778,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52dfbb2ff1a8..008cc0282338 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1349,7 +1349,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
- ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 078bcb3858c7..f66f21d6363e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9463,7 +9463,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
newsk->sk_flags = sk->sk_flags;
- newsk->sk_tsflags = sk->sk_tsflags;
+ memcpy(newsk->sk_tsflags, sk->sk_tsflags, sizeof(u32) * __MAX_TS_REQUESTOR);
newsk->sk_no_check_tx = sk->sk_no_check_tx;
newsk->sk_no_check_rx = sk->sk_no_check_rx;
newsk->sk_reuse = sk->sk_reuse;
diff --git a/net/socket.c b/net/socket.c
index 3b1b65b9f471..24619a27909a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -845,7 +845,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
{
- bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
+ bool cycles = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_BIND_PHC;
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
struct net_device *orig_dev;
ktime_t hwtstamp;
@@ -944,7 +944,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
}
memset(&tss, 0, sizeof(tss));
- tsflags = READ_ONCE(sk->sk_tsflags);
+ tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
if ((tsflags & SOF_TIMESTAMPING_SOFTWARE &&
(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
skb_is_err_queue(skb) ||
--
2.37.3
Powered by blists - more mailing lists