netdev - Re: [net-next PATCH v3 1/2] octeontx2-af: Add new mbox to support multicast/mirror offload

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <15c047f8121a501d8fbef8713d2d9e1fd4a564ad.camel@redhat.com>
Date: Thu, 23 Nov 2023 11:17:43 +0100
From: Paolo Abeni <pabeni@...hat.com>
To: Suman Ghosh <sumang@...vell.com>, sgoutham@...vell.com,
 gakula@...vell.com,  sbhatta@...vell.com, hkelam@...vell.com,
 davem@...emloft.net, edumazet@...gle.com,  kuba@...nel.org,
 netdev@...r.kernel.org, linux-kernel@...r.kernel.org, 
 lcherian@...vell.com, jerinj@...vell.com, horms@...nel.org, 
 wojciech.drewek@...el.com
Subject: Re: [net-next PATCH v3 1/2] octeontx2-af: Add new mbox to support
 multicast/mirror offload

On Tue, 2023-11-21 at 15:13 +0530, Suman Ghosh wrote:
[...]
> +static int nix_add_mce_list_entry(struct rvu *rvu,
> +				  struct nix_hw *nix_hw,
> +				  struct nix_mcast_grp_elem *elem,
> +				  struct nix_mcast_grp_update_req *req)
> +{
> +	struct mce *tmp_mce[NIX_MCE_ENTRY_MAX];

If I read correctly the above struct is is 256 bytes wide. Do you
really need to put so much data on the stack this deep? 


> +	u32 num_entry = req->num_mce_entry;
> +	struct nix_mce_list *mce_list;
> +	struct mce *mce;
> +	int i;
> +
> +	mce_list = &elem->mcast_mce_list;
> +	for (i = 0; i < num_entry; i++) {
> +		mce = kzalloc(sizeof(*mce), GFP_KERNEL);
> +		if (!mce)
> +			goto free_mce;
> +
> +		mce->pcifunc = req->pcifunc[i];
> +		mce->channel = req->channel[i];
> +		mce->rq_rss_index = req->rq_rss_index[i];
> +		mce->dest_type = req->dest_type[i];
> +		mce->is_active = 1;
> +		hlist_add_head(&mce->node, &mce_list->head);
> +		tmp_mce[i] = mce;
> +		mce_list->count++;
> +	}
> +
> +	mce_list->max += num_entry;
> +
> +	/* Dump the updated list to HW */
> +	if (elem->dir == NIX_MCAST_INGRESS)
> +		return nix_update_ingress_mce_list_hw(rvu, nix_hw, elem);
> +
> +	nix_update_egress_mce_list_hw(rvu, nix_hw, elem);
> +	return 0;
> +
> +free_mce:
> +	while (i) {
> +		hlist_del(&tmp_mce[i-1]->node);
> +		kfree(tmp_mce[i-1]);

Minor nit: checkpatch complains about the above: spaces are needed
around '-'.

> +		mce_list->count--;
> +		i--;
> +	}
> +
> +	return -ENOMEM;
> +}
> +
>  static int nix_update_mce_list_entry(struct nix_mce_list *mce_list,
>  				     u16 pcifunc, bool add)
>  {

[...]
> @@ -3237,13 +3473,30 @@ static int nix_setup_mcast(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
>  	int err, size;
>  
>  	size = (rvu_read64(rvu, blkaddr, NIX_AF_CONST3) >> 16) & 0x0F;
> -	size = (1ULL << size);
> +	size = BIT_ULL(size);
> +
> +	/* Allocate bitmap for rx mce entries */
> +	mcast->mce_counter[NIX_MCAST_INGRESS].max = 256UL << MC_TBL_SIZE;
> +	err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
> +	if (err)
> +		return -ENOMEM;
> +
> +	/* Allocate bitmap for tx mce entries */
> +	mcast->mce_counter[NIX_MCAST_EGRESS].max = MC_TX_MAX;
> +	err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
> +	if (err) {
> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
> +		return -ENOMEM;
> +	}
>  
>  	/* Alloc memory for multicast/mirror replication entries */
>  	err = qmem_alloc(rvu->dev, &mcast->mce_ctx,
> -			 (256UL << MC_TBL_SIZE), size);
> -	if (err)
> +			 mcast->mce_counter[NIX_MCAST_INGRESS].max, size);
> +	if (err) {
> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
>  		return -ENOMEM;
> +	}
>  
>  	rvu_write64(rvu, blkaddr, NIX_AF_RX_MCAST_BASE,
>  		    (u64)mcast->mce_ctx->iova);
> @@ -3256,8 +3509,12 @@ static int nix_setup_mcast(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
>  	size = rvu_read64(rvu, blkaddr, NIX_AF_MC_MIRROR_CONST) & 0xFFFF;
>  	err = qmem_alloc(rvu->dev, &mcast->mcast_buf,
>  			 (8UL << MC_BUF_CNT), size);
> -	if (err)
> +	if (err) {
> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
> +		qmem_free(rvu->dev, mcast->mce_ctx);

AFAICS, the above lines frees struct that was already allocated prior
to this patch. It looks like a fix possibly worthy a separate patch.

[...]
> +static void nix_mcast_update_mce_entry(struct rvu *rvu, u16 pcifunc, u8 is_active)
> +{
> +	struct nix_mcast_grp_elem *elem;
> +	struct nix_mcast_grp *mcast_grp;
> +	struct nix_hw *nix_hw;
> +	int blkaddr;
> +
> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
> +	nix_hw = get_nix_hw(rvu->hw, blkaddr);
> +	if (!nix_hw)
> +		return;
> +
> +	mcast_grp = &nix_hw->mcast_grp;
> +
> +	mutex_lock(&mcast_grp->mcast_grp_lock);
> +	list_for_each_entry(elem, &mcast_grp->mcast_grp_head, list) {
> +		struct nix_mce_list *mce_list;
> +		struct mce *mce;
> +
> +		/* Iterate the group elements and disable the element which
> +		 * received the disable request.
> +		 */
> +		mce_list = &elem->mcast_mce_list;
> +		hlist_for_each_entry(mce, &mce_list->head, node) {
> +			if (mce->pcifunc == pcifunc) {
> +				if (is_active)
> +					mce->is_active = 1;
> +				else
> +					mce->is_active = 0;

just:

				mce->is_active = is_active;

> +
> +				break;
> +			}
> +		}
> +
> +		/* Dump the updated list to HW */
> +		if (elem->dir == NIX_MCAST_INGRESS)
> +			nix_update_ingress_mce_list_hw(rvu, nix_hw, elem);
> +		else
> +			nix_update_egress_mce_list_hw(rvu, nix_hw, elem);
> +
> +		/* Update the multicast index in NPC rule */
> +		nix_mcast_update_action(rvu, elem);
> +	}
> +	mutex_unlock(&mcast_grp->mcast_grp_lock);
> +}
> +
>  int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req *req,
>  				     struct msg_rsp *rsp)
>  {

[...]
> @@ -5797,3 +6134,327 @@ int rvu_mbox_handler_nix_bandprof_get_hwinfo(struct rvu *rvu, struct msg_req *re
>  
>  	return 0;
>  }
> +
> +static struct nix_mcast_grp_elem *rvu_nix_mcast_find_grp_elem(struct nix_mcast_grp *mcast_grp,
> +							      u32 mcast_grp_idx)
> +{
> +	struct nix_mcast_grp_elem *iter, *result_iter = NULL;
> +
> +	mutex_lock(&mcast_grp->mcast_grp_lock);
> +	list_for_each_entry(iter, &mcast_grp->mcast_grp_head, list) {
> +		if (iter->mcast_grp_idx == mcast_grp_idx) {
> +			result_iter = iter;
> +			break;
> +		}
> +	}
> +	mutex_unlock(&mcast_grp->mcast_grp_lock);
> +
> +	return result_iter;

What prevents 'result_iter' from being freed at this point? (and
causing a later UaF)

> +}
> +
> +int rvu_nix_mcast_get_mce_index(struct rvu *rvu, u16 pcifunc, u32 mcast_grp_idx)
> +{
> +	struct nix_mcast_grp_elem *elem;
> +	struct nix_mcast_grp *mcast_grp;
> +	struct nix_hw *nix_hw;
> +	int blkaddr;
> +
> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
> +	nix_hw = get_nix_hw(rvu->hw, blkaddr);
> +	if (!nix_hw)
> +		return NIX_AF_ERR_INVALID_NIXBLK;
> +
> +	mcast_grp = &nix_hw->mcast_grp;
> +	elem = rvu_nix_mcast_find_grp_elem(mcast_grp, mcast_grp_idx);
> +	if (!elem)
> +		return NIX_AF_ERR_INVALID_MCAST_GRP;
> +
> +	return elem->mce_start_index;
> +}
> +
> +void rvu_nix_mcast_flr_free_entries(struct rvu *rvu, u16 pcifunc)
> +{
> +	struct nix_mcast_grp_destroy_req dreq = { 0 };
> +	struct nix_mcast_grp_update_req ureq = { 0 };
> +	struct nix_mcast_grp_update_rsp ursp = { 0 };
> +	struct nix_mcast_grp_elem *elem, *tmp;
> +	struct nix_mcast_grp *mcast_grp;
> +	struct nix_hw *nix_hw;
> +	int blkaddr;
> +
> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
> +	nix_hw = get_nix_hw(rvu->hw, blkaddr);
> +	if (!nix_hw)
> +		return;
> +
> +	mcast_grp = &nix_hw->mcast_grp;
> +
> +	mutex_lock(&mcast_grp->mcast_grp_lock);
> +	list_for_each_entry_safe(elem, tmp, &mcast_grp->mcast_grp_head, list) {
> +		struct nix_mce_list *mce_list;
> +		struct hlist_node *tmp;
> +		struct mce *mce;
> +
> +		/* If the pcifunc which created the multicast/mirror
> +		 * group received an FLR, then delete the entire group.
> +		 */
> +		if (elem->pcifunc == pcifunc) {
> +			mutex_unlock(&mcast_grp->mcast_grp_lock);
> +			/* Delete group */
> +			dreq.hdr.pcifunc = elem->pcifunc;
> +			dreq.mcast_grp_idx = elem->mcast_grp_idx;
> +			rvu_mbox_handler_nix_mcast_grp_destroy(rvu, &dreq, NULL);
> +			mutex_lock(&mcast_grp->mcast_grp_lock);

What prevents 'tmp' from being removed and freed while the
'mcast_grp_lock' is not held? there are other similar chunks later.

I'm under the impression that all the multicast data is touched under
the rtnl lock protection so the above lock does not matter much, but I
really did not validate such impression/guess.

Generally speaking the lock schema here looks complex and hard to
follow: a more descriptive changelog will help.

Cheers,

Paolo