[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <488bcc6a-2198-4fbc-a12d-329a378d6ea2@linux.dev>
Date: Fri, 8 Dec 2023 21:52:36 +0800
From: Zhu Yanjun <yanjun.zhu@...ux.dev>
To: Bob Pearson <rpearsonhpe@...il.com>, jgg@...dia.com,
linux-rdma@...r.kernel.org, netdev@...r.kernel.org, dsahern@...nel.org,
rain.1986.08.12@...il.com
Subject: Re: [PATCH for-next v6 3/7] RDMA/rxe: Register IP mcast address
在 2023/12/8 3:29, Bob Pearson 写道:
> Currently the rdma_rxe driver does not receive mcast packets at all.
>
> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
> the IP mcast address. This is required for mcast traffic to reach the
> rxe driver when coming from an external source.
>
> Fixes: 8700e3e7c485 ("Soft RoCE driver")
> Signed-off-by: Bob Pearson <rpearsonhpe@...il.com>
> ---
> drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
> drivers/infiniband/sw/rxe/rxe_net.c | 2 +-
> drivers/infiniband/sw/rxe/rxe_net.h | 1 +
> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 +
> 4 files changed, 102 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index 86cc2e18a7fd..5236761892dd 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -19,38 +19,116 @@
> * mcast packets in the rxe receive path.
> */
>
> +#include <linux/igmp.h>
> +
> #include "rxe.h"
>
> -/**
> - * rxe_mcast_add - add multicast address to rxe device
> - * @rxe: rxe device object
> - * @mgid: multicast address as a gid
> - *
> - * Returns 0 on success else an error
> - */
> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
> {
> + struct in6_addr *addr6 = (struct in6_addr *)mgid;
> + struct sock *sk = recv_sockets.sk6->sk;
> unsigned char ll_addr[ETH_ALEN];
> + int err;
> +
> + lock_sock(sk);
> + rtnl_lock();
> + err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
> + rtnl_unlock();
> + release_sock(sk);
> + if (err && err != -EADDRINUSE)
> + goto err_out;
>
> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> + err = dev_mc_add(rxe->ndev, ll_addr);
> + if (err)
> + goto err_drop;
> +
> + return 0;
>
> - return dev_mc_add(rxe->ndev, ll_addr);
> +err_drop:
> + lock_sock(sk);
> + rtnl_lock();
> + ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
> + rtnl_unlock();
> + release_sock(sk);
> +err_out:
> + return err;
> }
>
> -/**
> - * rxe_mcast_del - delete multicast address from rxe device
> - * @rxe: rxe device object
> - * @mgid: multicast address as a gid
> - *
> - * Returns 0 on success else an error
> - */
> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
> +static int rxe_mcast_add(struct rxe_mcg *mcg)
> {
> + struct rxe_dev *rxe = mcg->rxe;
> + union ib_gid *mgid = &mcg->mgid;
> unsigned char ll_addr[ETH_ALEN];
> + struct ip_mreqn imr = {};
> + int err;
> +
> + if (mcg->is_ipv6)
> + return rxe_mcast_add6(rxe, mgid);
> +
> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> + imr.imr_ifindex = rxe->ndev->ifindex;
> + rtnl_lock();
> + err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
> + rtnl_unlock();
Hi, David Ahern
About the functions ip_mc_join_group, ipv6_sock_mc_drop,
ipv6_sock_mc_drop and ip_mc_leave_group,
Can you share your advice about them?
In the following, lock_sock is used. Can you help us to check them?
./net/ipv4/devinet.c-634- lock_sock(sk);
./net/ipv4/devinet.c-635- if (join)
./net/ipv4/devinet.c:636: ret = ip_mc_join_group(sk, &mreq);
./net/ipv4/devinet.c-637- else
./net/ipv4/devinet.c-638- ret = ip_mc_leave_group(sk, &mreq);
./net/ipv4/devinet.c-639- release_sock(sk);
Thanks a lot.
Zhu Yanjun
> + if (err && err != -EADDRINUSE)
> + goto err_out;
> +
> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> + err = dev_mc_add(rxe->ndev, ll_addr);
> + if (err)
> + goto err_leave;
> +
> + return 0;
> +
> +err_leave:
> + rtnl_lock();
> + ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> + rtnl_unlock();
> +err_out:
> + return err;
> +}
> +
> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
> +{
> + struct sock *sk = recv_sockets.sk6->sk;
> + unsigned char ll_addr[ETH_ALEN];
> + int err, err2;
>
> ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
> + err = dev_mc_del(rxe->ndev, ll_addr);
> +
> + lock_sock(sk);
> + rtnl_lock();
> + err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
> + (struct in6_addr *)mgid);
> + rtnl_unlock();
> + release_sock(sk);
> +
> + return err ?: err2;
> +}
> +
> +static int rxe_mcast_del(struct rxe_mcg *mcg)
> +{
> + struct rxe_dev *rxe = mcg->rxe;
> + union ib_gid *mgid = &mcg->mgid;
> + unsigned char ll_addr[ETH_ALEN];
> + struct ip_mreqn imr = {};
> + int err, err2;
> +
> + if (mcg->is_ipv6)
> + return rxe_mcast_del6(rxe, mgid);
> +
> + imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
> + imr.imr_ifindex = rxe->ndev->ifindex;
> + ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
> + err = dev_mc_del(rxe->ndev, ll_addr);
> +
> + rtnl_lock();
> + err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
> + rtnl_unlock();
>
> - return dev_mc_del(rxe->ndev, ll_addr);
> + return err ?: err2;
> }
>
> /**
> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
> {
> kref_init(&mcg->ref_cnt);
> memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
> + mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
> INIT_LIST_HEAD(&mcg->qp_list);
> mcg->rxe = rxe;
>
> @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> spin_unlock_bh(&rxe->mcg_lock);
>
> /* add mcast address outside of lock */
> - err = rxe_mcast_add(rxe, mgid);
> + err = rxe_mcast_add(mcg);
> if (!err)
> return mcg;
>
> @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> {
> /* delete mcast address outside of lock */
> - rxe_mcast_del(mcg->rxe, &mcg->mgid);
> + rxe_mcast_del(mcg);
>
> spin_lock_bh(&mcg->rxe->mcg_lock);
> __rxe_destroy_mcg(mcg);
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index 58c3f3759bf0..b481f8da2002 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -18,7 +18,7 @@
> #include "rxe_net.h"
> #include "rxe_loc.h"
>
> -static struct rxe_recv_sockets recv_sockets;
> +struct rxe_recv_sockets recv_sockets;
>
> static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
> struct net_device *ndev,
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
> index 45d80d00f86b..89cee7d5340f 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.h
> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
> struct socket *sk4;
> struct socket *sk6;
> };
> +extern struct rxe_recv_sockets recv_sockets;
>
> int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index ccb9d19ffe8a..7be9e6232dd9 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -352,6 +352,7 @@ struct rxe_mcg {
> atomic_t qp_num;
> u32 qkey;
> u16 pkey;
> + bool is_ipv6;
> };
>
> struct rxe_mca {
Powered by blists - more mailing lists