[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9a821495-cac7-48d8-a2bc-1bd7ebeef23c@linux.dev>
Date: Tue, 29 Oct 2024 17:32:41 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Jason Xing <kerneljasonxing@...il.com>
Cc: davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
pabeni@...hat.com, dsahern@...nel.org, willemdebruijn.kernel@...il.com,
willemb@...gle.com, ast@...nel.org, daniel@...earbox.net, andrii@...nel.org,
eddyz87@...il.com, song@...nel.org, yonghong.song@...ux.dev,
john.fastabend@...il.com, kpsingh@...nel.org, sdf@...ichev.me,
haoluo@...gle.com, jolsa@...nel.org, shuah@...nel.org, ykolal@...com,
bpf@...r.kernel.org, netdev@...r.kernel.org,
Jason Xing <kernelxing@...cent.com>
Subject: Re: [PATCH net-next v3 03/14] net-timestamp: open gate for
bpf_setsockopt/_getsockopt
On 10/28/24 4:05 AM, Jason Xing wrote:
> From: Jason Xing <kernelxing@...cent.com>
>
> For now, we support bpf_setsockopt to set or clear timestamps flags.
>
> Users can use something like this in bpf program to turn on the feature:
> flags = SOF_TIMESTAMPING_TX_SCHED;
> bpf_setsockopt(skops, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof(flags));
> The specific use cases can be seen in the bpf selftest in this series.
>
> Later, I will support each flags one by one based on this.
>
> Signed-off-by: Jason Xing <kernelxing@...cent.com>
> ---
> include/net/sock.h | 4 ++--
> include/uapi/linux/net_tstamp.h | 7 +++++++
> net/core/filter.c | 7 +++++--
> net/core/sock.c | 34 ++++++++++++++++++++++++++-------
> net/ipv4/udp.c | 2 +-
> net/mptcp/sockopt.c | 2 +-
> net/socket.c | 2 +-
> 7 files changed, 44 insertions(+), 14 deletions(-)
>
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 5384f1e49f5c..062f405c744e 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1775,7 +1775,7 @@ static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk)
> #endif
>
> int sk_setsockopt(struct sock *sk, int level, int optname,
> - sockptr_t optval, unsigned int optlen);
> + sockptr_t optval, unsigned int optlen, bool bpf_timetamping);
> int sock_setsockopt(struct socket *sock, int level, int op,
> sockptr_t optval, unsigned int optlen);
> int do_sock_setsockopt(struct socket *sock, bool compat, int level,
> @@ -1784,7 +1784,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
> int optname, sockptr_t optval, sockptr_t optlen);
>
> int sk_getsockopt(struct sock *sk, int level, int optname,
> - sockptr_t optval, sockptr_t optlen);
> + sockptr_t optval, sockptr_t optlen, bool bpf_timetamping);
> int sock_gettstamp(struct socket *sock, void __user *userstamp,
> bool timeval, bool time32);
> struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
> diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
> index 858339d1c1c4..0696699cf964 100644
> --- a/include/uapi/linux/net_tstamp.h
> +++ b/include/uapi/linux/net_tstamp.h
> @@ -49,6 +49,13 @@ enum {
> SOF_TIMESTAMPING_TX_SCHED | \
> SOF_TIMESTAMPING_TX_ACK)
>
> +#define SOF_TIMESTAMPING_BPF_SUPPPORTED_MASK (SOF_TIMESTAMPING_SOFTWARE | \
hmm... so we are allowing it but SOF_TIMESTAMPING_SOFTWARE won't do anything
(meaning set and not-set are both no-op) ?
> + SOF_TIMESTAMPING_TX_SCHED | \
> + SOF_TIMESTAMPING_TX_SOFTWARE | \
> + SOF_TIMESTAMPING_TX_ACK | \
> + SOF_TIMESTAMPING_OPT_ID | \
> + SOF_TIMESTAMPING_OPT_ID_TCP)
> +
> /**
> * struct so_timestamping - SO_TIMESTAMPING parameter
> *
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 58761263176c..dc8ecf899ced 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5238,6 +5238,9 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
> break;
> case SO_BINDTODEVICE:
> break;
> + case SO_TIMESTAMPING_NEW:
How about only allow bpf_setsockopt(SO_TIMESTAMPING_NEW) instead of
bpf_setsockopt(SO_TIMESTAMPING). Does it solve the issue reported in v2?
> + case SO_TIMESTAMPING_OLD:
> + break;
> default:
> return -EINVAL;
> }
> @@ -5247,11 +5250,11 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
> return -EINVAL;
> return sk_getsockopt(sk, SOL_SOCKET, optname,
> KERNEL_SOCKPTR(optval),
> - KERNEL_SOCKPTR(optlen));
> + KERNEL_SOCKPTR(optlen), true);
> }
>
> return sk_setsockopt(sk, SOL_SOCKET, optname,
> - KERNEL_SOCKPTR(optval), *optlen);
> + KERNEL_SOCKPTR(optval), *optlen, true);
> }
>
> static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7f398bd07fb7..7e05748b1a06 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -941,6 +941,19 @@ int sock_set_timestamping(struct sock *sk, int optname,
> return 0;
> }
>
> +static int sock_set_timestamping_bpf(struct sock *sk,
> + struct so_timestamping timestamping)
> +{
> + u32 flags = timestamping.flags;
> +
> + if (flags & ~SOF_TIMESTAMPING_BPF_SUPPPORTED_MASK)
> + return -EINVAL;
> +
> + WRITE_ONCE(sk->sk_tsflags_bpf, flags);
I think it is cleaner to directly "WRITE_ONCE(sk->sk_tsflags_bpf, flags);" in
sol_socket_sockopt() instead of adding "bool bpf_timestamping" to sk_setsockopt.
sk_tsflags_bpf is a separate u32 anyway, so not a lot of code to share. The same
for getsockopt.
[ will continue the remaining patches a little later ]
> +
> + return 0;
> +}
> +
> void sock_set_keepalive(struct sock *sk)
> {
> lock_sock(sk);
> @@ -1159,7 +1172,7 @@ static int sockopt_validate_clockid(__kernel_clockid_t value)
> */
>
> int sk_setsockopt(struct sock *sk, int level, int optname,
> - sockptr_t optval, unsigned int optlen)
> + sockptr_t optval, unsigned int optlen, bool bpf_timetamping)
> {
> struct so_timestamping timestamping;
> struct socket *sock = sk->sk_socket;
> @@ -1409,7 +1422,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
> memset(×tamping, 0, sizeof(timestamping));
> timestamping.flags = val;
> }
> - ret = sock_set_timestamping(sk, optname, timestamping);
> + if (!bpf_timetamping)
> + ret = sock_set_timestamping(sk, optname, timestamping);
> + else
> + ret = sock_set_timestamping_bpf(sk, timestamping);
> break;
>
> case SO_RCVLOWAT:
> @@ -1626,7 +1642,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
> sockptr_t optval, unsigned int optlen)
> {
> return sk_setsockopt(sock->sk, level, optname,
> - optval, optlen);
> + optval, optlen, false);
> }
> EXPORT_SYMBOL(sock_setsockopt);
>
> @@ -1670,7 +1686,7 @@ static int groups_to_user(sockptr_t dst, const struct group_info *src)
> }
>
> int sk_getsockopt(struct sock *sk, int level, int optname,
> - sockptr_t optval, sockptr_t optlen)
> + sockptr_t optval, sockptr_t optlen, bool bpf_timetamping)
> {
> struct socket *sock = sk->sk_socket;
>
> @@ -1793,9 +1809,13 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
> * returning the flags when they were set through the same option.
> * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
> */
> - if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
> - v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
> - v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
> + if (!bpf_timetamping) {
> + if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
> + v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
> + v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
> + }
> + } else {
> + v.timestamping.flags = READ_ONCE(sk->sk_tsflags_bpf);
> }
> break;
>
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 0e24916b39d4..9a20af41e272 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2679,7 +2679,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> int is_udplite = IS_UDPLITE(sk);
>
> if (level == SOL_SOCKET) {
> - err = sk_setsockopt(sk, level, optname, optval, optlen);
> + err = sk_setsockopt(sk, level, optname, optval, optlen, false);
>
> if (optname == SO_RCVBUF || optname == SO_RCVBUFFORCE) {
> sockopt_lock_sock(sk);
> diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
> index 505445a9598f..7b12cc2db136 100644
> --- a/net/mptcp/sockopt.c
> +++ b/net/mptcp/sockopt.c
> @@ -306,7 +306,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
> return PTR_ERR(ssk);
> }
>
> - ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
> + ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen, false);
> if (ret == 0) {
> if (optname == SO_REUSEPORT)
> sk->sk_reuseport = ssk->sk_reuseport;
> diff --git a/net/socket.c b/net/socket.c
> index 9a8e4452b9b2..4bdca39685a6 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -2385,7 +2385,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
>
> ops = READ_ONCE(sock->ops);
> if (level == SOL_SOCKET) {
> - err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
> + err = sk_getsockopt(sock->sk, level, optname, optval, optlen, false);
> } else if (unlikely(!ops->getsockopt)) {
> err = -EOPNOTSUPP;
> } else {
Powered by blists - more mailing lists