[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d17da5b6-6273-4c2c-abd7-99378723866e@linux.dev>
Date: Fri, 13 Sep 2024 11:24:09 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Tiago Lam <tiagolam@...udflare.com>
Cc: "David S. Miller" <davem@...emloft.net>, David Ahern
<dsahern@...nel.org>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Alexei Starovoitov <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>, Eduard Zingerman <eddyz87@...il.com>,
Song Liu <song@...nel.org>, Yonghong Song <yonghong.song@...ux.dev>,
John Fastabend <john.fastabend@...il.com>, KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...ichev.me>, Hao Luo <haoluo@...gle.com>,
Jiri Olsa <jolsa@...nel.org>, Mykola Lysenko <mykolal@...com>,
Shuah Khan <shuah@...nel.org>, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, bpf@...r.kernel.org,
linux-kselftest@...r.kernel.org, Jakub Sitnicki <jakub@...udflare.com>,
kernel-team@...udflare.com
Subject: Re: [RFC PATCH 2/3] ipv6: Run a reverse sk_lookup on sendmsg.
On 9/13/24 2:39 AM, Tiago Lam wrote:
> This follows the same rationale provided for the ipv4 counterpart, where
> it now runs a reverse socket lookup when source addresses and/or ports
> are changed, on sendmsg, to check whether egress traffic should be
> allowed to go through or not.
>
> As with ipv4, the ipv6 sendmsg path is also extended here to support the
> IPV6_ORIGDSTADDR ancilliary message to be able to specify a source
> address/port.
>
> Suggested-by: Jakub Sitnicki <jakub@...udflare.com>
> Signed-off-by: Tiago Lam <tiagolam@...udflare.com>
> ---
> net/ipv6/datagram.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> net/ipv6/udp.c | 8 ++++--
> 2 files changed, 82 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index fff78496803d..4214dda1c320 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -756,6 +756,27 @@ void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
> }
> EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
>
> +static inline bool reverse_sk_lookup(struct flowi6 *fl6, struct sock *sk,
> + struct in6_addr *saddr, __be16 sport)
> +{
> + if (static_branch_unlikely(&bpf_sk_lookup_enabled) &&
> + (saddr && sport) &&
> + (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, saddr) || inet_sk(sk)->inet_sport != sport)) {
> + struct sock *sk_egress;
> +
> + bpf_sk_lookup_run_v6(sock_net(sk), IPPROTO_UDP, &fl6->daddr, fl6->fl6_dport,
> + saddr, ntohs(sport), 0, &sk_egress);
iirc, in the ingress path, the sk could also be selected by a tc bpf prog doing
bpf_sk_assign. Then this re-run on sk_lookup may give an incorrect result?
In general, is it necessary to rerun any bpf prog if the user space has
specified the IP[v6]_ORIGDSTADDR.
> + if (!IS_ERR_OR_NULL(sk_egress) &&
> + atomic64_read(&sk_egress->sk_cookie) == atomic64_read(&sk->sk_cookie))
> + return true;
> +
> + net_info_ratelimited("No reverse socket lookup match for local addr %pI6:%d remote addr %pI6:%d\n",
> + &saddr, ntohs(sport), &fl6->daddr, ntohs(fl6->fl6_dport));
> + }
> +
> + return false;
> +}
> +
> int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
> struct msghdr *msg, struct flowi6 *fl6,
> struct ipcm6_cookie *ipc6)
> @@ -844,7 +865,62 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
>
> break;
> }
> + case IPV6_ORIGDSTADDR:
> + {
> + struct sockaddr_in6 *sockaddr_in;
> + struct net_device *dev = NULL;
> +
> + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct sockaddr_in6))) {
> + err = -EINVAL;
> + goto exit_f;
> + }
> +
> + sockaddr_in = (struct sockaddr_in6 *)CMSG_DATA(cmsg);
> +
> + addr_type = __ipv6_addr_type(&sockaddr_in->sin6_addr);
> +
> + if (addr_type & IPV6_ADDR_LINKLOCAL)
> + return -EINVAL;
> +
> + /* If we're egressing with a different source address and/or port, we
> + * perform a reverse socket lookup. The rationale behind this is that we
> + * can allow return UDP traffic that has ingressed through sk_lookup to
> + * also egress correctly. In case the reverse lookup fails, we
> + * continue with the normal path.
> + *
> + * The lookup is performed if either source address and/or port changed, and
> + * neither is "0".
> + */
> + if (reverse_sk_lookup(fl6, sk, &sockaddr_in->sin6_addr,
> + sockaddr_in->sin6_port)) {
> + /* Override the source port and address to use with the one we
> + * got in cmsg and bail early.
> + */
> + fl6->saddr = sockaddr_in->sin6_addr;
> + fl6->fl6_sport = sockaddr_in->sin6_port;
> + break;
> + }
>
> + if (addr_type != IPV6_ADDR_ANY) {
> + int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
> +
> + if (!ipv6_can_nonlocal_bind(net, inet_sk(sk)) &&
> + !ipv6_chk_addr_and_flags(net,
> + &sockaddr_in->sin6_addr,
> + dev, !strict, 0,
> + IFA_F_TENTATIVE) &&
> + !ipv6_chk_acast_addr_src(net, dev,
> + &sockaddr_in->sin6_addr))
> + err = -EINVAL;
> + else
> + fl6->saddr = sockaddr_in->sin6_addr;
> + }
> +
> + if (err)
> + goto exit_f;
> +
> + break;
> + }
> case IPV6_FLOWINFO:
> if (cmsg->cmsg_len < CMSG_LEN(4)) {
> err = -EINVAL;
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 6602a2e9cdb5..6121cbb71ad3 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -1476,6 +1476,12 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>
> fl6->flowi6_uid = sk->sk_uid;
>
> + /* We use fl6's daddr and fl6_sport in the reverse sk_lookup done
> + * within ip6_datagram_send_ctl() now.
> + */
> + fl6->daddr = *daddr;
> + fl6->fl6_sport = inet->inet_sport;
> +
> if (msg->msg_controllen) {
> opt = &opt_space;
> memset(opt, 0, sizeof(struct ipv6_txoptions));
> @@ -1511,10 +1517,8 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>
> fl6->flowi6_proto = sk->sk_protocol;
> fl6->flowi6_mark = ipc6.sockc.mark;
> - fl6->daddr = *daddr;
> if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
> fl6->saddr = np->saddr;
> - fl6->fl6_sport = inet->inet_sport;
>
> if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
> err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
>
Powered by blists - more mailing lists