[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <02031003-872e-49bf-a658-c22bc7e1a954@linux.dev>
Date: Tue, 14 Jan 2025 17:17:20 -0800
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Jason Xing <kerneljasonxing@...il.com>
Cc: davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
pabeni@...hat.com, dsahern@...nel.org, willemdebruijn.kernel@...il.com,
willemb@...gle.com, ast@...nel.org, daniel@...earbox.net, andrii@...nel.org,
eddyz87@...il.com, song@...nel.org, yonghong.song@...ux.dev,
john.fastabend@...il.com, kpsingh@...nel.org, sdf@...ichev.me,
haoluo@...gle.com, jolsa@...nel.org, horms@...nel.org, bpf@...r.kernel.org,
netdev@...r.kernel.org
Subject: Re: [PATCH net-next v5 03/15] bpf: introduce timestamp_used to allow
UDP socket fetched in bpf prog
On 1/12/25 3:37 AM, Jason Xing wrote:
> timestamp_used consists of two parts, one is is_fullsock, the other
> one is for UDP socket which will be support in the next round.
>
> Signed-off-by: Jason Xing <kerneljasonxing@...il.com>
> ---
> include/linux/filter.h | 1 +
> net/core/filter.c | 4 ++--
> net/core/sock.c | 1 +
> net/ipv4/tcp_input.c | 2 ++
> net/ipv4/tcp_output.c | 2 ++
> 5 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index a3ea46281595..daca3fe48b8f 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1508,6 +1508,7 @@ struct bpf_sock_ops_kern {
> void *skb_data_end;
> u8 op;
> u8 is_fullsock;
> + u8 timestamp_used;
> u8 remaining_opt_len;
> u64 temp; /* temp and everything after is not
> * initialized to 0 before calling
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c6dd2d2e44c8..1ac996ec5e0f 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -10424,10 +10424,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> } \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
> struct bpf_sock_ops_kern, \
> - is_fullsock), \
> + timestamp_used), \
> fullsock_reg, si->src_reg, \
> offsetof(struct bpf_sock_ops_kern, \
> - is_fullsock)); \
> + timestamp_used)); \
hmm... I don't think it is the right change. This change may disallow the bpf
prog from reading skops->sk. It is fine to allow bpf prog (includes the new
timestamp callback) getting the skops->sk as long as skops->sk is a fullsock.
The actual thing that needs to address is writing to sk, like:
case offsetof(struct bpf_sock_ops, sk_txhash):
SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
struct sock, type);
and also all the SOCK_OPS_GET_TCP_SOCK_FIELD() to prepare for the udp sock
support. After this patch 3, I think I start to understand the udp/fullsock
discussion in patch 2. is_fullsock here does not mean it is tcp, although it is
always a tcp_sock now. It literally means it is a full "struct sock". The
verifier will treat the skops->sk as "struct sock" instead of "struct tcp_sock".
> *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
> if (si->dst_reg == si->src_reg) \
> *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
> diff --git a/net/core/sock.c b/net/core/sock.c
> index e06bcafb1b2d..dbb9326ae9d1 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -958,6 +958,7 @@ void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op)
> if (sk_is_tcp(sk) && sk_fullsock(sk))
> sock_ops.is_fullsock = 1;
> sock_ops.sk = sk;
> + sock_ops.timestamp_used = 1;
> __cgroup_bpf_run_filter_sock_ops(sk, &sock_ops, CGROUP_SOCK_OPS);
> }
> #endif
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 4811727b8a02..cad41ad34bd5 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -169,6 +169,7 @@ static void bpf_skops_parse_hdr(struct sock *sk, struct sk_buff *skb)
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> sock_ops.op = BPF_SOCK_OPS_PARSE_HDR_OPT_CB;
> sock_ops.is_fullsock = 1;
> + sock_ops.timestamp_used = 1;
> sock_ops.sk = sk;
> bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb));
>
> @@ -185,6 +186,7 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
> memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
> sock_ops.op = bpf_op;
> sock_ops.is_fullsock = 1;
> + sock_ops.timestamp_used = 1;
> sock_ops.sk = sk;
> /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
> if (skb)
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 0e5b9a654254..7b4d1dfd57d4 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -522,6 +522,7 @@ static void bpf_skops_hdr_opt_len(struct sock *sk, struct sk_buff *skb,
> sock_owned_by_me(sk);
>
> sock_ops.is_fullsock = 1;
> + sock_ops.timestamp_used = 1;
> sock_ops.sk = sk;
> }
>
> @@ -567,6 +568,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
> sock_owned_by_me(sk);
>
> sock_ops.is_fullsock = 1;
> + sock_ops.timestamp_used = 1;
The "timestamp_used = 1;' assignment has missed some places. At least in the
tcp_call_bpf().
Also, the name "timestamp_used" is confusing. Like setting timestamp_used in the
bpf_skops_*_hdr_opt() callback here when it is not a timestamp callback.
Altogether, need to rethink what to add to sock_ops instead of timestamp_used
and it should be checked in "some" of the SOCK_OPS_*_FIELD(). A quick thought
(not 100% sure) is to add "u8 allow_direct_access" which is only set for the
existing sockops callbacks.
[ I will continue the rest later. ]
> sock_ops.sk = sk;
> }
>
Powered by blists - more mailing lists