lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <a28ca07d-d8ab-4bd9-a975-34ea336f97d2@linux.dev>
Date: Tue, 5 Dec 2023 13:57:50 +0800
From: Zhu Yanjun <yanjun.zhu@...ux.dev>
To: Bob Pearson <rpearsonhpe@...il.com>, jgg@...dia.com,
 linux-rdma@...r.kernel.org, dsahern@...nel.org, davem@...emloft.net,
 netdev@...r.kernel.org
Subject: Re: [PATCH for-next v5 6/7] RDMA/rxe: Cleanup mcg lifetime

Add  David S. Miller and  David Ahern.

They are the maintainers in netdev and very familiar with mcast.

Zhu Yanjun

在 2023/12/5 8:26, Bob Pearson 写道:
> Currently the rdma_rxe driver has two different and not really
> compatible ways of managing the lifetime of an mcast group,
> by ref counting the mcg struct and counting the number of
> attached qp's. They are each doing part of the job of cleaning
> up an mcg when the last qp is detached and are racy in the
> process. This patch removes using the use of the number of
> qp's.
>
> Fix up mcg reference counting so the ref count will drop
> to zero correctly and move code from rxe_destroy_mcg to
> rxe_cleanup_mcg since rxe_destroy is no longer needed.
>
> This set of fixes scrambles the code in rxe_mast.c and as
> a result a lot of cleanup has been done as well.
>
> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
> Signed-off-by: Bob Pearson <rpearsonhpe@...il.com>
> ---
>   drivers/infiniband/sw/rxe/rxe_loc.h   |   2 +-
>   drivers/infiniband/sw/rxe/rxe_mcast.c | 170 +++++++-------------------
>   drivers/infiniband/sw/rxe/rxe_recv.c  |   2 +-
>   3 files changed, 46 insertions(+), 128 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
> index 62b2b25903fc..0509ccdaa2f2 100644
> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
> @@ -37,7 +37,7 @@ void rxe_cq_cleanup(struct rxe_pool_elem *elem);
>   struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
>   int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
>   int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid);
> -void rxe_cleanup_mcg(struct kref *kref);
> +int rxe_put_mcg(struct rxe_mcg *mcg);
>   
>   /* rxe_mmap.c */
>   struct rxe_mmap_info {
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index ac8da0bc8428..c2a28aed9d34 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -131,13 +131,31 @@ static int rxe_mcast_del(struct rxe_mcg *mcg)
>   	return err ?: err2;
>   }
>   
> -/**
> - * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree)
> - * @mcg: mcg object with an embedded red-black tree node
> - *
> - * Context: caller must hold a reference to mcg and rxe->mcg_mutex and
> - * is responsible to avoid adding the same mcg twice to the tree.
> - */
> +static void __rxe_remove_mcg(struct rxe_mcg *mcg)
> +{
> +	rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
> +}
> +
> +static void rxe_cleanup_mcg(struct kref *kref)
> +{
> +	struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
> +
> +	__rxe_remove_mcg(mcg);
> +	rxe_mcast_del(mcg);
> +	atomic_dec(&mcg->rxe->mcg_num);
> +	kfree_rcu(mcg, rcu);
> +}
> +
> +static int rxe_get_mcg(struct rxe_mcg *mcg)
> +{
> +	return kref_get_unless_zero(&mcg->ref_cnt);
> +}
> +
> +int rxe_put_mcg(struct rxe_mcg *mcg)
> +{
> +	return kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> +}
> +
>   static void __rxe_insert_mcg(struct rxe_mcg *mcg)
>   {
>   	struct rb_root *tree = &mcg->rxe->mcg_tree;
> @@ -166,23 +184,11 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg)
>   	rb_insert_color(&mcg->node, tree);
>   }
>   
> -/**
> - * __rxe_remove_mcg - remove an mcg from red-black tree holding lock
> - * @mcg: mcast group object with an embedded red-black tree node
> - *
> - * Context: caller must hold a reference to mcg and rxe->mcg_mutex
> - */
> -static void __rxe_remove_mcg(struct rxe_mcg *mcg)
> -{
> -	rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
> -}
> -
>   /*
>    * Lookup mgid in the multicast group red-black tree and try to
>    * get a ref on it. Return mcg on success else NULL.
>    */
> -struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe,
> -					union ib_gid *mgid)
> +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   {
>   	struct rb_root *tree = &rxe->mcg_tree;
>   	struct rxe_mcg *mcg;
> @@ -204,20 +210,14 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe,
>   		else
>   			break;
>   	}
> -	mcg = (node && kref_get_unless_zero(&mcg->ref_cnt)) ? mcg : NULL;
> +	mcg = (node && rxe_get_mcg(mcg)) ? mcg : NULL;
>   	rcu_read_unlock();
>   
>   	return mcg;
>   }
>   
> -/**
> - * rxe_get_mcg - lookup or allocate a mcg
> - * @rxe: rxe device object
> - * @mgid: multicast IP address as a gid
> - *
> - * Returns: mcg on success else ERR_PTR(error)
> - */
> -static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> +/* find an existing mcg or allocate a new one */
> +static struct rxe_mcg *rxe_alloc_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   {
>   	struct rxe_mcg *mcg;
>   	int err;
> @@ -228,7 +228,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   		goto out;	/* nothing to do */
>   
>   	if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
> -		err = -ENOMEM;
> +		err = -EINVAL;
>   		goto err_dec;
>   	}
>   
> @@ -244,19 +244,17 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   	kref_init(&mcg->ref_cnt);
>   	INIT_LIST_HEAD(&mcg->qp_list);
>   	spin_lock_init(&mcg->lock);
> -	kref_get(&mcg->ref_cnt);
> -	__rxe_insert_mcg(mcg);
>   
>   	err = rxe_mcast_add(mcg);
>   	if (err)
>   		goto err_free;
>   
> +	__rxe_insert_mcg(mcg);
>   out:
>   	mutex_unlock(&rxe->mcg_mutex);
>   	return mcg;
>   
>   err_free:
> -	__rxe_remove_mcg(mcg);
>   	kfree(mcg);
>   err_dec:
>   	atomic_dec(&rxe->mcg_num);
> @@ -264,64 +262,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>   	return ERR_PTR(err);
>   }
>   
> -/**
> - * rxe_cleanup_mcg - cleanup mcg for kref_put
> - * @kref: struct kref embnedded in mcg
> - */
> -void rxe_cleanup_mcg(struct kref *kref)
> -{
> -	struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
> -
> -	kfree_rcu(mcg, rcu);
> -}
> -
> -/**
> - * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_mutex
> - * @mcg: the mcg object
> - *
> - * Context: caller is holding rxe->mcg_mutex
> - * no qp's are attached to mcg
> - */
> -static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> -{
> -	struct rxe_dev *rxe = mcg->rxe;
> -
> -	/* remove mcg from red-black tree then drop ref */
> -	__rxe_remove_mcg(mcg);
> -	kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> -
> -	atomic_dec(&rxe->mcg_num);
> -}
> -
> -/**
> - * rxe_destroy_mcg - destroy mcg object
> - * @mcg: the mcg object
> - *
> - * Context: no qp's are attached to mcg
> - */
> -static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> -{
> -	/* delete mcast address outside of lock */
> -	rxe_mcast_del(mcg);
> -
> -	mutex_lock(&mcg->rxe->mcg_mutex);
> -	__rxe_destroy_mcg(mcg);
> -	mutex_unlock(&mcg->rxe->mcg_mutex);
> -}
> -
> -/**
> - * rxe_attach_mcg - attach qp to mcg if not already attached
> - * @qp: qp object
> - * @mcg: mcg object
> - *
> - * Returns: 0 on success else an error
> - */
> -static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> +static int rxe_attach_mcg(struct rxe_qp *qp, struct rxe_mcg *mcg)
>   {
>   	struct rxe_dev *rxe = mcg->rxe;
>   	struct rxe_mca *mca;
>   	unsigned long flags;
> -	int err;
> +	int err = 0;
>   
>   	mutex_lock(&rxe->mcg_mutex);
>   	spin_lock_irqsave(&mcg->lock, flags);
> @@ -355,29 +301,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>   	rxe_get(qp);
>   	mca->qp = qp;
>   
> +	rxe_get_mcg(mcg);
> +
>   	spin_lock_irqsave(&mcg->lock, flags);
>   	list_add_tail(&mca->qp_list, &mcg->qp_list);
>   	spin_unlock_irqrestore(&mcg->lock, flags);
> -out:
> -	mutex_unlock(&rxe->mcg_mutex);
> -	return 0;
> +	goto out;
>   
>   err_dec_qp_num:
>   	atomic_dec(&mcg->qp_num);
>   err_dec_attach:
>   	atomic_dec(&rxe->mcg_attach);
> +out:
> +	rxe_put_mcg(mcg);
>   	mutex_unlock(&rxe->mcg_mutex);
>   	return err;
>   }
>   
> -/**
> - * rxe_detach_mcg - detach qp from mcg
> - * @mcg: mcg object
> - * @qp: qp object
> - *
> - * Returns: 0 on success else an error if qp is not attached.
> - */
> -static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> +static int rxe_detach_mcg(struct rxe_qp *qp, struct rxe_mcg *mcg)
>   {
>   	struct rxe_dev *rxe = mcg->rxe;
>   	struct rxe_mca *mca;
> @@ -394,7 +335,6 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>   	}
>   	spin_unlock_irqrestore(&mcg->lock, flags);
>   
> -	/* we didn't find the qp on the list */
>   	err = -EINVAL;
>   	goto err_out;
>   
> @@ -402,23 +342,15 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>   	spin_lock_irqsave(&mcg->lock, flags);
>   	list_del(&mca->qp_list);
>   	spin_unlock_irqrestore(&mcg->lock, flags);
> +	rxe_put_mcg(mcg);
>   
>   	atomic_dec(&mcg->qp_num);
>   	atomic_dec(&mcg->rxe->mcg_attach);
>   	atomic_dec(&mca->qp->mcg_num);
>   	rxe_put(mca->qp);
>   	kfree(mca);
> -
> -	/* if the number of qp's attached to the
> -	 * mcast group falls to zero go ahead and
> -	 * tear it down. This will not free the
> -	 * object since we are still holding a ref
> -	 * from the caller
> -	 */
> -	if (atomic_read(&mcg->qp_num) <= 0)
> -		__rxe_destroy_mcg(mcg);
> -
>   err_out:
> +	rxe_put_mcg(mcg);
>   	mutex_unlock(&rxe->mcg_mutex);
>   	return err;
>   }
> @@ -433,7 +365,6 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>    */
>   int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
>   {
> -	int err;
>   	struct rxe_dev *rxe = to_rdev(ibqp->device);
>   	struct rxe_qp *qp = to_rqp(ibqp);
>   	struct rxe_mcg *mcg;
> @@ -441,20 +372,11 @@ int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
>   	if (rxe->attr.max_mcast_grp == 0)
>   		return -EINVAL;
>   
> -	/* takes a ref on mcg if successful */
> -	mcg = rxe_get_mcg(rxe, mgid);
> +	mcg = rxe_alloc_mcg(rxe, mgid);
>   	if (IS_ERR(mcg))
>   		return PTR_ERR(mcg);
>   
> -	err = rxe_attach_mcg(mcg, qp);
> -
> -	/* if we failed to attach the first qp to mcg tear it down */
> -	if (atomic_read(&mcg->qp_num) == 0)
> -		rxe_destroy_mcg(mcg);
> -
> -	kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> -
> -	return err;
> +	return rxe_attach_mcg(qp, mcg);
>   }
>   
>   /**
> @@ -470,14 +392,10 @@ int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
>   	struct rxe_dev *rxe = to_rdev(ibqp->device);
>   	struct rxe_qp *qp = to_rqp(ibqp);
>   	struct rxe_mcg *mcg;
> -	int err;
>   
>   	mcg = rxe_lookup_mcg(rxe, mgid);
>   	if (!mcg)
>   		return -EINVAL;
>   
> -	err = rxe_detach_mcg(mcg, qp);
> -	kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> -
> -	return err;
> +	return rxe_detach_mcg(qp, mcg);
>   }
> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 6cf0da958864..e3ec3dfc57f4 100644
> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -262,7 +262,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>   
>   	spin_unlock_irqrestore(&mcg->lock, flags);
>   
> -	kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
> +	rxe_put_mcg(mcg);
>   
>   	if (likely(!skb))
>   		return;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ