[<prev] [next>] [day] [month] [year] [list]
Message-ID: <39b6a032-e530-4b33-99dc-3d3378504e15@linux.dev>
Date: Tue, 5 Dec 2023 13:56:43 +0800
From: Zhu Yanjun <yanjun.zhu@...ux.dev>
To: Bob Pearson <rpearsonhpe@...il.com>, jgg@...dia.com,
linux-rdma@...r.kernel.org, dsahern@...nel.org, davem@...emloft.net,
netdev@...r.kernel.org
Subject: Re: [PATCH for-next v5 4/7] RDMA/rxe: Let rxe_lookup_mcg use
rcu_read_lock
Add David S. Miller and David Ahern.
They are the maintainers in netdev and very familiar with mcast.
Zhu Yanjun
在 2023/12/5 8:26, Bob Pearson 写道:
> Change locking of read side operations of the mcast group
> red-black tree to use rcu read locking. This will allow changing
> the mcast lock in the next patch to be a mutex without
> breaking rxe_recv.c which runs in an atomic state. It is also a
> better implementation than the current use of a spin-lock per
> rdma device since receiving mcast packets will be much more
> common than registering/deregistering mcast groups.
>
> Signed-off-by: Bob Pearson <rpearsonhpe@...il.com>
> ---
> drivers/infiniband/sw/rxe/rxe_mcast.c | 59 +++++++++------------------
> drivers/infiniband/sw/rxe/rxe_verbs.h | 1 +
> 2 files changed, 21 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> index 54735d07cee5..44948f9cb02b 100644
> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> @@ -151,13 +151,18 @@ static void __rxe_insert_mcg(struct rxe_mcg *mcg)
> tmp = rb_entry(node, struct rxe_mcg, node);
>
> cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid));
> - if (cmp > 0)
> + if (cmp > 0) {
> link = &(*link)->rb_left;
> - else
> + } else if (cmp < 0) {
> link = &(*link)->rb_right;
> + } else {
> + /* we must delete the old mcg before adding one */
> + WARN_ON_ONCE(1);
> + return;
> + }
> }
>
> - rb_link_node(&mcg->node, node, link);
> + rb_link_node_rcu(&mcg->node, node, link);
> rb_insert_color(&mcg->node, tree);
> }
>
> @@ -172,15 +177,11 @@ static void __rxe_remove_mcg(struct rxe_mcg *mcg)
> rb_erase(&mcg->node, &mcg->rxe->mcg_tree);
> }
>
> -/**
> - * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock
> - * @rxe: rxe device object
> - * @mgid: multicast IP address
> - *
> - * Context: caller must hold rxe->mcg_lock
> - * Returns: mcg on success and takes a ref to mcg else NULL
> +/*
> + * Lookup mgid in the multicast group red-black tree and try to
> + * get a ref on it. Return mcg on success else NULL.
> */
> -static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe,
> union ib_gid *mgid)
> {
> struct rb_root *tree = &rxe->mcg_tree;
> @@ -188,7 +189,8 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> struct rb_node *node;
> int cmp;
>
> - node = tree->rb_node;
> + rcu_read_lock();
> + node = rcu_dereference_raw(tree->rb_node);
>
> while (node) {
> mcg = rb_entry(node, struct rxe_mcg, node);
> @@ -196,35 +198,14 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid));
>
> if (cmp > 0)
> - node = node->rb_left;
> + node = rcu_dereference_raw(node->rb_left);
> else if (cmp < 0)
> - node = node->rb_right;
> + node = rcu_dereference_raw(node->rb_right);
> else
> break;
> }
> -
> - if (node) {
> - kref_get(&mcg->ref_cnt);
> - return mcg;
> - }
> -
> - return NULL;
> -}
> -
> -/**
> - * rxe_lookup_mcg - lookup up mcg in red-back tree
> - * @rxe: rxe device object
> - * @mgid: multicast IP address
> - *
> - * Returns: mcg if found else NULL
> - */
> -struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> -{
> - struct rxe_mcg *mcg;
> -
> - spin_lock_bh(&rxe->mcg_lock);
> - mcg = __rxe_lookup_mcg(rxe, mgid);
> - spin_unlock_bh(&rxe->mcg_lock);
> + mcg = (node && kref_get_unless_zero(&mcg->ref_cnt)) ? mcg : NULL;
> + rcu_read_unlock();
>
> return mcg;
> }
> @@ -292,7 +273,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>
> spin_lock_bh(&rxe->mcg_lock);
> /* re-check to see if someone else just added it */
> - tmp = __rxe_lookup_mcg(rxe, mgid);
> + tmp = rxe_lookup_mcg(rxe, mgid);
> if (tmp) {
> spin_unlock_bh(&rxe->mcg_lock);
> atomic_dec(&rxe->mcg_num);
> @@ -322,7 +303,7 @@ void rxe_cleanup_mcg(struct kref *kref)
> {
> struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt);
>
> - kfree(mcg);
> + kfree_rcu(mcg, rcu);
> }
>
> /**
> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index 7be9e6232dd9..8058e5039322 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -345,6 +345,7 @@ struct rxe_mw {
>
> struct rxe_mcg {
> struct rb_node node;
> + struct rcu_head rcu;
> struct kref ref_cnt;
> struct rxe_dev *rxe;
> struct list_head qp_list;
Powered by blists - more mailing lists