[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJr_XRDphUUTLbXJbbVGjaQ-HBQ59DioQs_0PSVvmLpGiHk1FQ@mail.gmail.com>
Date: Thu, 7 Dec 2023 09:47:14 +0800
From: Rain River <rain.1986.08.12@...il.com>
To: Zhu Yanjun <yanjun.zhu@...ux.dev>
Cc: Bob Pearson <rpearsonhpe@...il.com>, jgg@...dia.com, linux-rdma@...r.kernel.org,
dsahern@...nel.org, davem@...emloft.net, netdev@...r.kernel.org
Subject: Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address
On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@...ux.dev> wrote:
>
>
> 在 2023/12/5 13:55, Zhu Yanjun 写道:
> > Add David S. Miller and David Ahern.
> >
> > They are the maintainers in netdev and very familiar with mcast.
> >
> > Zhu Yanjun
> >
> > 在 2023/12/5 8:26, Bob Pearson 写道:
> >> Currently the rdma_rxe driver does not receive mcast packets at all.
> >>
> >> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
> >> the IP mcast address. This is required for mcast traffic to reach the
> >> rxe driver when coming from an external source.
> >>
> >> Fixes: 8700e3e7c485 ("Soft RoCE driver")
> >> Signed-off-by: Bob Pearson <rpearsonhpe@...il.com>
> >> ---
> >> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
> >> drivers/infiniband/sw/rxe/rxe_net.c | 2 +-
> >> drivers/infiniband/sw/rxe/rxe_net.h | 1 +
> >> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 +
> >> 4 files changed, 102 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> index 86cc2e18a7fd..54735d07cee5 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >> @@ -19,38 +19,116 @@
> >> * mcast packets in the rxe receive path.
> >> */
> >> +#include <linux/igmp.h>
> >> +
> >> #include "rxe.h"
> >> -/**
> >> - * rxe_mcast_add - add multicast address to rxe device
> >> - * @rxe: rxe device object
> >> - * @mgid: multicast address as a gid
> >> - *
> >> - * Returns 0 on success else an error
> >> - */
> >> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
> >> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
> >> {
> >> + struct in6_addr *addr6 = (struct in6_addr *)mgid;
> >> + struct sock *sk = recv_sockets.sk6->sk;
> >> unsigned char ll_addr[ETH_ALEN];
> >> + int err;
> >> +
> >> + spin_lock_bh(&sk->sk_lock.slock);
> >> + rtnl_lock();
> >> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
>
>
> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.
./net/ipv6/addrconf.c-2915- lock_sock(sk);
./net/ipv6/addrconf.c-2916- if (join)
./net/ipv6/addrconf.c:2917: ret = ipv6_sock_mc_join(sk,
ifindex, addr);
./net/ipv6/addrconf.c-2918- else
./net/ipv6/addrconf.c-2919- ret = ipv6_sock_mc_drop(sk,
ifindex, addr);
./net/ipv6/addrconf.c-2920- release_sock(sk);
Should be lock_sock?
>
> Please Jason or experts from netdev comment on this.
>
> Thanks,
>
> Zhu Yanjun
>
>
> >> + rtnl_unlock();
> >> + spin_unlock_bh(&sk->sk_lock.slock);
> >> + if (err && err != -EADDRINUSE)
> >> + goto err_out;
> >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> >> + err = dev_mc_add(rxe->ndev, ll_addr);
> >> + if (err)
> >> + goto err_drop;
> >> +
> >> + return 0;
> >> - return dev_mc_add(rxe->ndev, ll_addr);
> >> +err_drop:
> >> + spin_lock_bh(&sk->sk_lock.slock);
> >> + rtnl_lock();
> >> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
> >> + rtnl_unlock();
> >> + spin_unlock_bh(&sk->sk_lock.slock);
> >> +err_out:
> >> + return err;
> >> }
> >> -/**
> >> - * rxe_mcast_del - delete multicast address from rxe device
> >> - * @rxe: rxe device object
> >> - * @mgid: multicast address as a gid
> >> - *
> >> - * Returns 0 on success else an error
> >> - */
> >> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
> >> +static int rxe_mcast_add(struct rxe_mcg *mcg)
> >> {
> >> + struct rxe_dev *rxe = mcg->rxe;
> >> + union ib_gid *mgid = &mcg->mgid;
> >> unsigned char ll_addr[ETH_ALEN];
> >> + struct ip_mreqn imr = {};
> >> + int err;
> >> +
> >> + if (mcg->is_ipv6)
> >> + return rxe_mcast_add6(rxe, mgid);
> >> +
> >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> >> + imr.imr_ifindex = rxe->ndev->ifindex;
> >> + rtnl_lock();
> >> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
> >> + rtnl_unlock();
> >> + if (err && err != -EADDRINUSE)
> >> + goto err_out;
> >> +
> >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> >> + err = dev_mc_add(rxe->ndev, ll_addr);
> >> + if (err)
> >> + goto err_leave;
> >> +
> >> + return 0;
> >> +
> >> +err_leave:
> >> + rtnl_lock();
> >> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> >> + rtnl_unlock();
> >> +err_out:
> >> + return err;
> >> +}
> >> +
> >> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
> >> +{
> >> + struct sock *sk = recv_sockets.sk6->sk;
> >> + unsigned char ll_addr[ETH_ALEN];
> >> + int err, err2;
> >> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> >> + err = dev_mc_del(rxe->ndev, ll_addr);
> >> +
> >> + spin_lock_bh(&sk->sk_lock.slock);
> >> + rtnl_lock();
> >> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
> >> + (struct in6_addr *)mgid);
> >> + rtnl_unlock();
> >> + spin_unlock_bh(&sk->sk_lock.slock);
> >> +
> >> + return err ?: err2;
> >> +}
> >> +
> >> +static int rxe_mcast_del(struct rxe_mcg *mcg)
> >> +{
> >> + struct rxe_dev *rxe = mcg->rxe;
> >> + union ib_gid *mgid = &mcg->mgid;
> >> + unsigned char ll_addr[ETH_ALEN];
> >> + struct ip_mreqn imr = {};
> >> + int err, err2;
> >> +
> >> + if (mcg->is_ipv6)
> >> + return rxe_mcast_del6(rxe, mgid);
> >> +
> >> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> >> + imr.imr_ifindex = rxe->ndev->ifindex;
> >> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> >> + err = dev_mc_del(rxe->ndev, ll_addr);
> >> +
> >> + rtnl_lock();
> >> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> >> + rtnl_unlock();
> >> - return dev_mc_del(rxe->ndev, ll_addr);
> >> + return err ?: err2;
> >> }
> >> /**
> >> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe,
> >> union ib_gid *mgid,
> >> {
> >> kref_init(&mcg->ref_cnt);
> >> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
> >> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
> >> INIT_LIST_HEAD(&mcg->qp_list);
> >> mcg->rxe = rxe;
> >> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct
> >> rxe_dev *rxe, union ib_gid *mgid)
> >> spin_unlock_bh(&rxe->mcg_lock);
> >> /* add mcast address outside of lock */
> >> - err = rxe_mcast_add(rxe, mgid);
> >> + err = rxe_mcast_add(mcg);
> >> if (!err)
> >> return mcg;
> >> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> >> static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> >> {
> >> /* delete mcast address outside of lock */
> >> - rxe_mcast_del(mcg->rxe, &mcg->mgid);
> >> + rxe_mcast_del(mcg);
> >> spin_lock_bh(&mcg->rxe->mcg_lock);
> >> __rxe_destroy_mcg(mcg);
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c
> >> b/drivers/infiniband/sw/rxe/rxe_net.c
> >> index 58c3f3759bf0..b481f8da2002 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> >> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> >> @@ -18,7 +18,7 @@
> >> #include "rxe_net.h"
> >> #include "rxe_loc.h"
> >> -static struct rxe_recv_sockets recv_sockets;
> >> +struct rxe_recv_sockets recv_sockets;
> >> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
> >> struct net_device *ndev,
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h
> >> b/drivers/infiniband/sw/rxe/rxe_net.h
> >> index 45d80d00f86b..89cee7d5340f 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_net.h
> >> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
> >> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
> >> struct socket *sk4;
> >> struct socket *sk6;
> >> };
> >> +extern struct rxe_recv_sockets recv_sockets;
> >> int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
> >> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> b/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> index ccb9d19ffe8a..7be9e6232dd9 100644
> >> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> >> @@ -352,6 +352,7 @@ struct rxe_mcg {
> >> atomic_t qp_num;
> >> u32 qkey;
> >> u16 pkey;
> >> + bool is_ipv6;
> >> };
> >> struct rxe_mca {
>
Powered by blists - more mailing lists