[<prev] [next>] [day] [month] [year] [list]
Message-ID: <a28ca07d-d8ab-4bd9-a975-34ea336f97d2@linux.dev>
Date: Tue, 5 Dec 2023 13:57:50 +0800
From: Zhu Yanjun <yanjun.zhu@...ux.dev>
To: Bob Pearson <rpearsonhpe@...il.com>, jgg@...dia.com,
linux-rdma@...r.kernel.org, dsahern@...nel.org, davem@...emloft.net,
netdev@...r.kernel.org
Subject: Re: [PATCH for-next v5 6/7] RDMA/rxe: Cleanup mcg lifetime
Add David S. Miller and David Ahern.
They are the maintainers in netdev and very familiar with mcast.
Zhu Yanjun
在 2023/12/5 8:26, Bob Pearson 写道:
> Currently the rdma_rxe driver has two different and not really
> compatible ways of managing the lifetime of an mcast group,
> by ref counting the mcg struct and counting the number of
> attached qp's. They are each doing part of the job of cleaning
> up an mcg when the last qp is detached and are racy in the
> process. This patch removes using the use of the number of
> qp's.
>
> Fix up mcg reference counting so the ref count will drop
> to zero correctly and move code from rxe_destroy_mcg to
> rxe_cleanup_mcg since rxe_destroy is no longer needed.
>
> This set of fixes scrambles the code in rxe_mast.c and as
> a result a lot of cleanup has been done as well.
>
> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
> Signed-off-by: Bob Pearson <rpearsonhpe@...il.com>
> ---
> drivers/infiniband/sw/rxe/rxe_loc.h | 2 +-
> drivers/infiniband/sw/rxe/rxe_mcast.c | 170 +++++++-------------------
> drivers/infiniband/sw/rxe/rxe_recv.c | 2 +-
> 3 files changed, 46 insertions(+), 128 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index 62b2b25903fc..0509ccdaa2f2 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -37,7 +37,7 @@ void rxe_cq_cleanup(struct rxe_pool_elem *elem);
> struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
> int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
> int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
> -void rxe_cleanup_mcg(struct kref *kref);
> +int rxe_put_mcg(struct rxe_mcg *mcg);
>
> /* rxe_mmap.c */
> struct rxe_mmap_info {
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index ac8da0bc8428..c2a28aed9d34 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -131,13 +131,31 @@ static int rxe_mcast_del(struct rxe_mcg *mcg)
> return err ?: err2;
> }
>
> -/**
> - * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree)
> - * @mcg: mcg object with an embedded red-black tree node
> - *
> - * Context: caller must hold a reference to mcg and rxe->mcg_mutex and
> - * is responsible to avoid adding the same mcg twice to the tree.
> - */
> +static void __rxe_remove_mcg(struct rxe_mcg *mcg)
> +{
> + rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
> +}
> +
> +static void rxe_cleanup_mcg(struct kref *kref)
> +{
> + struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
> +
> + __rxe_remove_mcg(mcg);
> + rxe_mcast_del(mcg);
> + atomic_dec(&mcg->rxe->mcg_num);
> + kfree_rcu(mcg, rcu);
> +}
> +
> +static int rxe_get_mcg(struct rxe_mcg *mcg)
> +{
> + return kref_get_unless_zero(&mcg->ref_cnt);
> +}
> +
> +int rxe_put_mcg(struct rxe_mcg *mcg)
> +{
> + return kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> +}
> +
> static void __rxe_insert_mcg(struct rxe_mcg *mcg)
> {
> struct rb_root *tree = &mcg->rxe->mcg_tree;
> @@ -166,23 +184,11 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg)
> rb_insert_color(&mcg->node, tree);
> }
>
> -/**
> - * __rxe_remove_mcg - remove an mcg from red-black tree holding lock
> - * @mcg: mcast group object with an embedded red-black tree node
> - *
> - * Context: caller must hold a reference to mcg and rxe->mcg_mutex
> - */
> -static void __rxe_remove_mcg(struct rxe_mcg *mcg)
> -{
> - rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
> -}
> -
> /*
> * Lookup mgid in the multicast group red-black tree and try to
> * get a ref on it. Return mcg on success else NULL.
> */
> -struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe,
> - union ib_gid *mgid)
> +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> {
> struct rb_root *tree = &rxe->mcg_tree;
> struct rxe_mcg *mcg;
> @@ -204,20 +210,14 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe,
> else
> break;
> }
> - mcg = (node && kref_get_unless_zero(&mcg->ref_cnt)) ? mcg : NULL;
> + mcg = (node && rxe_get_mcg(mcg)) ? mcg : NULL;
> rcu_read_unlock();
>
> return mcg;
> }
>
> -/**
> - * rxe_get_mcg - lookup or allocate a mcg
> - * @rxe: rxe device object
> - * @mgid: multicast IP address as a gid
> - *
> - * Returns: mcg on success else ERR_PTR(error)
> - */
> -static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> +/* find an existing mcg or allocate a new one */
> +static struct rxe_mcg *rxe_alloc_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> {
> struct rxe_mcg *mcg;
> int err;
> @@ -228,7 +228,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> goto out; /* nothing to do */
>
> if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
> - err = -ENOMEM;
> + err = -EINVAL;
> goto err_dec;
> }
>
> @@ -244,19 +244,17 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> kref_init(&mcg->ref_cnt);
> INIT_LIST_HEAD(&mcg->qp_list);
> spin_lock_init(&mcg->lock);
> - kref_get(&mcg->ref_cnt);
> - __rxe_insert_mcg(mcg);
>
> err = rxe_mcast_add(mcg);
> if (err)
> goto err_free;
>
> + __rxe_insert_mcg(mcg);
> out:
> mutex_unlock(&rxe->mcg_mutex);
> return mcg;
>
> err_free:
> - __rxe_remove_mcg(mcg);
> kfree(mcg);
> err_dec:
> atomic_dec(&rxe->mcg_num);
> @@ -264,64 +262,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> return ERR_PTR(err);
> }
>
> -/**
> - * rxe_cleanup_mcg - cleanup mcg for kref_put
> - * @kref: struct kref embnedded in mcg
> - */
> -void rxe_cleanup_mcg(struct kref *kref)
> -{
> - struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
> -
> - kfree_rcu(mcg, rcu);
> -}
> -
> -/**
> - * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_mutex
> - * @mcg: the mcg object
> - *
> - * Context: caller is holding rxe->mcg_mutex
> - * no qp's are attached to mcg
> - */
> -static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> -{
> - struct rxe_dev *rxe = mcg->rxe;
> -
> - /* remove mcg from red-black tree then drop ref */
> - __rxe_remove_mcg(mcg);
> - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> -
> - atomic_dec(&rxe->mcg_num);
> -}
> -
> -/**
> - * rxe_destroy_mcg - destroy mcg object
> - * @mcg: the mcg object
> - *
> - * Context: no qp's are attached to mcg
> - */
> -static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> -{
> - /* delete mcast address outside of lock */
> - rxe_mcast_del(mcg);
> -
> - mutex_lock(&mcg->rxe->mcg_mutex);
> - __rxe_destroy_mcg(mcg);
> - mutex_unlock(&mcg->rxe->mcg_mutex);
> -}
> -
> -/**
> - * rxe_attach_mcg - attach qp to mcg if not already attached
> - * @qp: qp object
> - * @mcg: mcg object
> - *
> - * Returns: 0 on success else an error
> - */
> -static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> +static int rxe_attach_mcg(struct rxe_qp *qp, struct rxe_mcg *mcg)
> {
> struct rxe_dev *rxe = mcg->rxe;
> struct rxe_mca *mca;
> unsigned long flags;
> - int err;
> + int err = 0;
>
> mutex_lock(&rxe->mcg_mutex);
> spin_lock_irqsave(&mcg->lock, flags);
> @@ -355,29 +301,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> rxe_get(qp);
> mca->qp = qp;
>
> + rxe_get_mcg(mcg);
> +
> spin_lock_irqsave(&mcg->lock, flags);
> list_add_tail(&mca->qp_list, &mcg->qp_list);
> spin_unlock_irqrestore(&mcg->lock, flags);
> -out:
> - mutex_unlock(&rxe->mcg_mutex);
> - return 0;
> + goto out;
>
> err_dec_qp_num:
> atomic_dec(&mcg->qp_num);
> err_dec_attach:
> atomic_dec(&rxe->mcg_attach);
> +out:
> + rxe_put_mcg(mcg);
> mutex_unlock(&rxe->mcg_mutex);
> return err;
> }
>
> -/**
> - * rxe_detach_mcg - detach qp from mcg
> - * @mcg: mcg object
> - * @qp: qp object
> - *
> - * Returns: 0 on success else an error if qp is not attached.
> - */
> -static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> +static int rxe_detach_mcg(struct rxe_qp *qp, struct rxe_mcg *mcg)
> {
> struct rxe_dev *rxe = mcg->rxe;
> struct rxe_mca *mca;
> @@ -394,7 +335,6 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> }
> spin_unlock_irqrestore(&mcg->lock, flags);
>
> - /* we didn't find the qp on the list */
> err = -EINVAL;
> goto err_out;
>
> @@ -402,23 +342,15 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> spin_lock_irqsave(&mcg->lock, flags);
> list_del(&mca->qp_list);
> spin_unlock_irqrestore(&mcg->lock, flags);
> + rxe_put_mcg(mcg);
>
> atomic_dec(&mcg->qp_num);
> atomic_dec(&mcg->rxe->mcg_attach);
> atomic_dec(&mca->qp->mcg_num);
> rxe_put(mca->qp);
> kfree(mca);
> -
> - /* if the number of qp's attached to the
> - * mcast group falls to zero go ahead and
> - * tear it down. This will not free the
> - * object since we are still holding a ref
> - * from the caller
> - */
> - if (atomic_read(&mcg->qp_num) <= 0)
> - __rxe_destroy_mcg(mcg);
> -
> err_out:
> + rxe_put_mcg(mcg);
> mutex_unlock(&rxe->mcg_mutex);
> return err;
> }
> @@ -433,7 +365,6 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> */
> int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
> {
> - int err;
> struct rxe_dev *rxe = to_rdev(ibqp->device);
> struct rxe_qp *qp = to_rqp(ibqp);
> struct rxe_mcg *mcg;
> @@ -441,20 +372,11 @@ int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
> if (rxe->attr.max_mcast_grp == 0)
> return -EINVAL;
>
> - /* takes a ref on mcg if successful */
> - mcg = rxe_get_mcg(rxe, mgid);
> + mcg = rxe_alloc_mcg(rxe, mgid);
> if (IS_ERR(mcg))
> return PTR_ERR(mcg);
>
> - err = rxe_attach_mcg(mcg, qp);
> -
> - /* if we failed to attach the first qp to mcg tear it down */
> - if (atomic_read(&mcg->qp_num) == 0)
> - rxe_destroy_mcg(mcg);
> -
> - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> -
> - return err;
> + return rxe_attach_mcg(qp, mcg);
> }
>
> /**
> @@ -470,14 +392,10 @@ int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
> struct rxe_dev *rxe = to_rdev(ibqp->device);
> struct rxe_qp *qp = to_rqp(ibqp);
> struct rxe_mcg *mcg;
> - int err;
>
> mcg = rxe_lookup_mcg(rxe, mgid);
> if (!mcg)
> return -EINVAL;
>
> - err = rxe_detach_mcg(mcg, qp);
> - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> -
> - return err;
> + return rxe_detach_mcg(qp, mcg);
> }
> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 6cf0da958864..e3ec3dfc57f4 100644
> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -262,7 +262,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>
> spin_unlock_irqrestore(&mcg->lock, flags);
>
> - kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> + rxe_put_mcg(mcg);
>
> if (likely(!skb))
> return;
Powered by blists - more mailing lists