[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iKEL4ZhL0BkRiY+5vnUQ6vC=eJ=J+gGFg6+CJ7QL8oOjQ@mail.gmail.com>
Date: Wed, 13 Dec 2023 15:55:41 +0100
From: Eric Dumazet <edumazet@...gle.com>
To: Leone Fernando <leone4fernando@...il.com>
Cc: davem@...emloft.net, dsahern@...nel.org, kuba@...nel.org,
pabeni@...hat.com, willemdebruijn.kernel@...il.com,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net] ipmr: support IP_PKTINFO on cache report IGMP msg
On Wed, Dec 13, 2023 at 3:35 PM Leone Fernando <leone4fernando@...il.com> wrote:
>
> In order to support IP_PKTINFO on those packets, we need to call
> ipv4_pktinfo_prepare, so introduced minor changes to this
> function to support this flow.
>
> When sending mrouted/pimd daemons a cache report IGMP msg, it is
> unnecessary to set dst on the newly created skb.
> It used to be necessary on older versions until
> commit d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") which
> changed the way IP_PKTINFO struct is been retrieved.
>
Given this is a 12 years old bug, I would rather target net-next tree.
> Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference")
> Signed-off-by: Leone Fernando <leone4fernando@...il.com>
> ---
> include/net/ip.h | 10 +++++++++-
> net/ipv4/ip_sockglue.c | 25 ++++++++++++++++---------
> net/ipv4/ipmr.c | 12 +++++-------
> net/ipv4/raw.c | 2 +-
> net/ipv4/udp.c | 2 +-
> 5 files changed, 32 insertions(+), 19 deletions(-)
>
> diff --git a/include/net/ip.h b/include/net/ip.h
> index b31be912489a..1b40b7386c56 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -767,7 +767,15 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
> * Functions provided by ip_sockglue.c
> */
>
> -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
> +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
> + struct sk_buff *oskb);
> +
> +
> +static inline void ipv4_pktinfo_input_prepare(const struct sock *sk, struct sk_buff *skb)
> +{
> + ipv4_pktinfo_prepare(sk, skb, NULL);
> +}
> +
> void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
> struct sk_buff *skb, int tlen, int offset);
> int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index d7d13940774e..fb26963e3869 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -1364,19 +1364,26 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
> /**
> * ipv4_pktinfo_prepare - transfer some info from rtable to skb
> * @sk: socket
> - * @skb: buffer
> + * @iskb: input buffer
> + * @oskb: out buffer
> *
> * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
> * destination in skb->cb[] before dst drop.
> * This way, receiver doesn't make cache line misses to read rtable.
> */
> -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
> +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
> + struct sk_buff *oskb)
This looks more complicated than needed.
I am pretty sure we can fix the bug without touching this function...
> {
> - struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
> + struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(iskb);
> bool prepare = inet_test_bit(PKTINFO, sk) ||
> ipv6_sk_rxinfo(sk);
>
> - if (prepare && skb_rtable(skb)) {
> + if (oskb) {
> + memcpy(oskb->cb, iskb->cb, sizeof(iskb->cb));
> + pktinfo = PKTINFO_SKB_CB(oskb);
> + }
> +
> + if (prepare && skb_rtable(iskb)) {
> /* skb->cb is overloaded: prior to this point it is IP{6}CB
> * which has interface index (iif) as the first member of the
> * underlying inet{6}_skb_parm struct. This code then overlays
> @@ -1386,20 +1393,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
> * (e.g., process binds socket to eth0 for Tx which is
> * redirected to loopback in the rtable/dst).
> */
> - struct rtable *rt = skb_rtable(skb);
> - bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
> + struct rtable *rt = skb_rtable(iskb);
> + bool l3slave = ipv4_l3mdev_skb(IPCB(iskb)->flags);
>
> if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
> - pktinfo->ipi_ifindex = inet_iif(skb);
> + pktinfo->ipi_ifindex = inet_iif(iskb);
> else if (l3slave && rt && rt->rt_iif)
> pktinfo->ipi_ifindex = rt->rt_iif;
>
> - pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
> + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(iskb);
> } else {
> pktinfo->ipi_ifindex = 0;
> pktinfo->ipi_spec_dst.s_addr = 0;
> }
> - skb_dst_drop(skb);
> + skb_dst_drop(iskb);
> }
>
> int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 9e222a57bc2b..6ed7c88743f9 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt,
> struct sk_buff *skb;
> int ret;
>
> + mroute_sk = rcu_dereference(mrt->mroute_sk);
> + if (!mroute_sk)
> + return -EINVAL;
> +
> if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
> skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
> else
> @@ -1069,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt,
> msg = (struct igmpmsg *)skb_network_header(skb);
> msg->im_vif = vifi;
> msg->im_vif_hi = vifi >> 8;
> - skb_dst_set(skb, dst_clone(skb_dst(pkt)));
> + ipv4_pktinfo_prepare(mroute_sk, pkt, skb);
All we need is to call ipv4_pktinfo_prepare(sk, pkt);
then copy pkt->cb to skb->cb ?
> /* Add our header */
> igmp = skb_put(skb, sizeof(struct igmphdr));
> igmp->type = assert;
> @@ -1079,12 +1083,6 @@ static int ipmr_cache_report(const struct mr_table *mrt,
> skb->transport_header = skb->network_header;
> }
>
> - mroute_sk = rcu_dereference(mrt->mroute_sk);
> - if (!mroute_sk) {
> - kfree_skb(skb);
> - return -EINVAL;
> - }
> -
> igmpmsg_netlink_event(mrt, skb);
>
> /* Deliver to mrouted */
> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> index 27da9d7294c0..cde60c8deed4 100644
> --- a/net/ipv4/raw.c
> +++ b/net/ipv4/raw.c
> @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
>
> /* Charge it to the socket. */
>
> - ipv4_pktinfo_prepare(sk, skb);
> + ipv4_pktinfo_input_prepare(sk, skb);
> if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
> kfree_skb_reason(skb, reason);
> return NET_RX_DROP;
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 89e5a806b82e..3e5a418c96c3 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
>
> udp_csum_pull_header(skb);
>
> - ipv4_pktinfo_prepare(sk, skb);
> + ipv4_pktinfo_input_prepare(sk, skb);
> return __udp_queue_rcv_skb(sk, skb);
>
> csum_error:
> --
> 2.34.1
>
Powered by blists - more mailing lists