lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <319ba5c7-1ce8-4417-86de-0b42e8abad16@gmail.com>
Date: Thu, 7 Dec 2023 13:07:14 -0600
From: Bob Pearson <rpearsonhpe@...il.com>
To: Rain River <rain.1986.08.12@...il.com>, Zhu Yanjun <yanjun.zhu@...ux.dev>
Cc: jgg@...dia.com, linux-rdma@...r.kernel.org, dsahern@...nel.org,
 davem@...emloft.net, netdev@...r.kernel.org
Subject: Re: [PATCH for-next v5 3/7] RDMA/rxe: Register IP mcast address



On 12/6/23 19:47, Rain River wrote:
> On Tue, Dec 5, 2023 at 6:30 PM Zhu Yanjun <yanjun.zhu@...ux.dev> wrote:
>>
>>
>> 在 2023/12/5 13:55, Zhu Yanjun 写道:
>>> Add David S. Miller and  David Ahern.
>>>
>>> They are the maintainers in netdev and very familiar with mcast.
>>>
>>> Zhu Yanjun
>>>
>>> 在 2023/12/5 8:26, Bob Pearson 写道:
>>>> Currently the rdma_rxe driver does not receive mcast packets at all.
>>>>
>>>> Add code to rxe_mcast_add() and rxe_mcast_del() to register/deregister
>>>> the IP mcast address. This is required for mcast traffic to reach the
>>>> rxe driver when coming from an external source.
>>>>
>>>> Fixes: 8700e3e7c485 ("Soft RoCE driver")
>>>> Signed-off-by: Bob Pearson <rpearsonhpe@...il.com>
>>>> ---
>>>>    drivers/infiniband/sw/rxe/rxe_mcast.c | 119 +++++++++++++++++++++-----
>>>>    drivers/infiniband/sw/rxe/rxe_net.c   |   2 +-
>>>>    drivers/infiniband/sw/rxe/rxe_net.h   |   1 +
>>>>    drivers/infiniband/sw/rxe/rxe_verbs.h |   1 +
>>>>    4 files changed, 102 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> index 86cc2e18a7fd..54735d07cee5 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> @@ -19,38 +19,116 @@
>>>>     * mcast packets in the rxe receive path.
>>>>     */
>>>>    +#include <linux/igmp.h>
>>>> +
>>>>    #include "rxe.h"
>>>>    -/**
>>>> - * rxe_mcast_add - add multicast address to rxe device
>>>> - * @rxe: rxe device object
>>>> - * @mgid: multicast address as a gid
>>>> - *
>>>> - * Returns 0 on success else an error
>>>> - */
>>>> -static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
>>>> +static int rxe_mcast_add6(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>    {
>>>> +    struct in6_addr *addr6 = (struct in6_addr *)mgid;
>>>> +    struct sock *sk = recv_sockets.sk6->sk;
>>>>        unsigned char ll_addr[ETH_ALEN];
>>>> +    int err;
>>>> +
>>>> +    spin_lock_bh(&sk->sk_lock.slock);
>>>> +    rtnl_lock();
>>>> +    err = ipv6_sock_mc_join(sk, rxe->ndev->ifindex, addr6);
>>
>>
>> Normally sk_lock is used. Not sure if spin_lock_bh is correct or not.
> 
> ./net/ipv6/addrconf.c-2915-     lock_sock(sk);
> ./net/ipv6/addrconf.c-2916-     if (join)
> ./net/ipv6/addrconf.c:2917:             ret = ipv6_sock_mc_join(sk,
> ifindex, addr);
> ./net/ipv6/addrconf.c-2918-     else
> ./net/ipv6/addrconf.c-2919-             ret = ipv6_sock_mc_drop(sk,
> ifindex, addr);
> ./net/ipv6/addrconf.c-2920-     release_sock(sk);
> 
> Should be lock_sock?

It works as well as spin_lock_bh() in preventing the RCU splat and
looks like the preferred way. I'll make this change.

Bob
> 
>>
>> Please Jason or experts from netdev comment on this.
>>
>> Thanks,
>>
>> Zhu Yanjun
>>
>>
>>>> +    rtnl_unlock();
>>>> +    spin_unlock_bh(&sk->sk_lock.slock);
>>>> +    if (err && err != -EADDRINUSE)
>>>> +        goto err_out;
>>>>          ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>>>> +    err = dev_mc_add(rxe->ndev, ll_addr);
>>>> +    if (err)
>>>> +        goto err_drop;
>>>> +
>>>> +    return 0;
>>>>    -    return dev_mc_add(rxe->ndev, ll_addr);
>>>> +err_drop:
>>>> +    spin_lock_bh(&sk->sk_lock.slock);
>>>> +    rtnl_lock();
>>>> +    ipv6_sock_mc_drop(sk, rxe->ndev->ifindex, addr6);
>>>> +    rtnl_unlock();
>>>> +    spin_unlock_bh(&sk->sk_lock.slock);
>>>> +err_out:
>>>> +    return err;
>>>>    }
>>>>    -/**
>>>> - * rxe_mcast_del - delete multicast address from rxe device
>>>> - * @rxe: rxe device object
>>>> - * @mgid: multicast address as a gid
>>>> - *
>>>> - * Returns 0 on success else an error
>>>> - */
>>>> -static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
>>>> +static int rxe_mcast_add(struct rxe_mcg *mcg)
>>>>    {
>>>> +    struct rxe_dev *rxe = mcg->rxe;
>>>> +    union ib_gid *mgid = &mcg->mgid;
>>>>        unsigned char ll_addr[ETH_ALEN];
>>>> +    struct ip_mreqn imr = {};
>>>> +    int err;
>>>> +
>>>> +    if (mcg->is_ipv6)
>>>> +        return rxe_mcast_add6(rxe, mgid);
>>>> +
>>>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>>>> +    imr.imr_ifindex = rxe->ndev->ifindex;
>>>> +    rtnl_lock();
>>>> +    err = ip_mc_join_group(recv_sockets.sk4->sk, &imr);
>>>> +    rtnl_unlock();
>>>> +    if (err && err != -EADDRINUSE)
>>>> +        goto err_out;
>>>> +
>>>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>>>> +    err = dev_mc_add(rxe->ndev, ll_addr);
>>>> +    if (err)
>>>> +        goto err_leave;
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_leave:
>>>> +    rtnl_lock();
>>>> +    ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>>>> +    rtnl_unlock();
>>>> +err_out:
>>>> +    return err;
>>>> +}
>>>> +
>>>> +static int rxe_mcast_del6(struct rxe_dev *rxe, union ib_gid *mgid)
>>>> +{
>>>> +    struct sock *sk = recv_sockets.sk6->sk;
>>>> +    unsigned char ll_addr[ETH_ALEN];
>>>> +    int err, err2;
>>>>          ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr);
>>>> +    err = dev_mc_del(rxe->ndev, ll_addr);
>>>> +
>>>> +    spin_lock_bh(&sk->sk_lock.slock);
>>>> +    rtnl_lock();
>>>> +    err2 = ipv6_sock_mc_drop(sk, rxe->ndev->ifindex,
>>>> +            (struct in6_addr *)mgid);
>>>> +    rtnl_unlock();
>>>> +    spin_unlock_bh(&sk->sk_lock.slock);
>>>> +
>>>> +    return err ?: err2;
>>>> +}
>>>> +
>>>> +static int rxe_mcast_del(struct rxe_mcg *mcg)
>>>> +{
>>>> +    struct rxe_dev *rxe = mcg->rxe;
>>>> +    union ib_gid *mgid = &mcg->mgid;
>>>> +    unsigned char ll_addr[ETH_ALEN];
>>>> +    struct ip_mreqn imr = {};
>>>> +    int err, err2;
>>>> +
>>>> +    if (mcg->is_ipv6)
>>>> +        return rxe_mcast_del6(rxe, mgid);
>>>> +
>>>> +    imr.imr_multiaddr = *(struct in_addr *)(mgid->raw + 12);
>>>> +    imr.imr_ifindex = rxe->ndev->ifindex;
>>>> +    ip_eth_mc_map(imr.imr_multiaddr.s_addr, ll_addr);
>>>> +    err = dev_mc_del(rxe->ndev, ll_addr);
>>>> +
>>>> +    rtnl_lock();
>>>> +    err2 = ip_mc_leave_group(recv_sockets.sk4->sk, &imr);
>>>> +    rtnl_unlock();
>>>>    -    return dev_mc_del(rxe->ndev, ll_addr);
>>>> +    return err ?: err2;
>>>>    }
>>>>      /**
>>>> @@ -164,6 +242,7 @@ static void __rxe_init_mcg(struct rxe_dev *rxe,
>>>> union ib_gid *mgid,
>>>>    {
>>>>        kref_init(&mcg->ref_cnt);
>>>>        memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
>>>> +    mcg->is_ipv6 = !ipv6_addr_v4mapped((struct in6_addr *)mgid);
>>>>        INIT_LIST_HEAD(&mcg->qp_list);
>>>>        mcg->rxe = rxe;
>>>>    @@ -225,7 +304,7 @@ static struct rxe_mcg *rxe_get_mcg(struct
>>>> rxe_dev *rxe, union ib_gid *mgid)
>>>>        spin_unlock_bh(&rxe->mcg_lock);
>>>>          /* add mcast address outside of lock */
>>>> -    err = rxe_mcast_add(rxe, mgid);
>>>> +    err = rxe_mcast_add(mcg);
>>>>        if (!err)
>>>>            return mcg;
>>>>    @@ -273,7 +352,7 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>    static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>    {
>>>>        /* delete mcast address outside of lock */
>>>> -    rxe_mcast_del(mcg->rxe, &mcg->mgid);
>>>> +    rxe_mcast_del(mcg);
>>>>          spin_lock_bh(&mcg->rxe->mcg_lock);
>>>>        __rxe_destroy_mcg(mcg);
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c
>>>> b/drivers/infiniband/sw/rxe/rxe_net.c
>>>> index 58c3f3759bf0..b481f8da2002 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>>>> @@ -18,7 +18,7 @@
>>>>    #include "rxe_net.h"
>>>>    #include "rxe_loc.h"
>>>>    -static struct rxe_recv_sockets recv_sockets;
>>>> +struct rxe_recv_sockets recv_sockets;
>>>>      static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
>>>>                         struct net_device *ndev,
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.h
>>>> b/drivers/infiniband/sw/rxe/rxe_net.h
>>>> index 45d80d00f86b..89cee7d5340f 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_net.h
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.h
>>>> @@ -15,6 +15,7 @@ struct rxe_recv_sockets {
>>>>        struct socket *sk4;
>>>>        struct socket *sk6;
>>>>    };
>>>> +extern struct rxe_recv_sockets recv_sockets;
>>>>      int rxe_net_add(const char *ibdev_name, struct net_device *ndev);
>>>>    diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> index ccb9d19ffe8a..7be9e6232dd9 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> @@ -352,6 +352,7 @@ struct rxe_mcg {
>>>>        atomic_t        qp_num;
>>>>        u32            qkey;
>>>>        u16            pkey;
>>>> +    bool            is_ipv6;
>>>>    };
>>>>      struct rxe_mca {
>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ