linux-kernel - RE: [EXT] Re: [net-next PATCH v3 1/2] octeontx2-af: Add new mbox to support multicast/mirror offload

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <SJ0PR18MB52166B498ED2E7190D803622DBBEA@SJ0PR18MB5216.namprd18.prod.outlook.com>
Date:   Sun, 26 Nov 2023 16:04:19 +0000
From:   Suman Ghosh <sumang@...vell.com>
To:     Paolo Abeni <pabeni@...hat.com>,
        Sunil Kovvuri Goutham <sgoutham@...vell.com>,
        Geethasowjanya Akula <gakula@...vell.com>,
        Subbaraya Sundeep Bhatta <sbhatta@...vell.com>,
        Hariprasad Kelam <hkelam@...vell.com>,
        "davem@...emloft.net" <davem@...emloft.net>,
        "edumazet@...gle.com" <edumazet@...gle.com>,
        "kuba@...nel.org" <kuba@...nel.org>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Linu Cherian <lcherian@...vell.com>,
        Jerin Jacob Kollanukkaran <jerinj@...vell.com>,
        "horms@...nel.org" <horms@...nel.org>,
        "wojciech.drewek@...el.com" <wojciech.drewek@...el.com>
Subject: RE: [EXT] Re: [net-next PATCH v3 1/2] octeontx2-af: Add new mbox to
 support multicast/mirror offload

>[...]
>> +static int nix_add_mce_list_entry(struct rvu *rvu,
>> +				  struct nix_hw *nix_hw,
>> +				  struct nix_mcast_grp_elem *elem,
>> +				  struct nix_mcast_grp_update_req *req) {
>> +	struct mce *tmp_mce[NIX_MCE_ENTRY_MAX];
>
>If I read correctly the above struct is is 256 bytes wide. Do you really
>need to put so much data on the stack this deep?
[Suman] Ack, will update the logic in v4.
>
>
>> +	u32 num_entry = req->num_mce_entry;
>> +	struct nix_mce_list *mce_list;
>> +	struct mce *mce;
>> +	int i;
>> +
>> +	mce_list = &elem->mcast_mce_list;
>> +	for (i = 0; i < num_entry; i++) {
>> +		mce = kzalloc(sizeof(*mce), GFP_KERNEL);
>> +		if (!mce)
>> +			goto free_mce;
>> +
>> +		mce->pcifunc = req->pcifunc[i];
>> +		mce->channel = req->channel[i];
>> +		mce->rq_rss_index = req->rq_rss_index[i];
>> +		mce->dest_type = req->dest_type[i];
>> +		mce->is_active = 1;
>> +		hlist_add_head(&mce->node, &mce_list->head);
>> +		tmp_mce[i] = mce;
>> +		mce_list->count++;
>> +	}
>> +
>> +	mce_list->max += num_entry;
>> +
>> +	/* Dump the updated list to HW */
>> +	if (elem->dir == NIX_MCAST_INGRESS)
>> +		return nix_update_ingress_mce_list_hw(rvu, nix_hw, elem);
>> +
>> +	nix_update_egress_mce_list_hw(rvu, nix_hw, elem);
>> +	return 0;
>> +
>> +free_mce:
>> +	while (i) {
>> +		hlist_del(&tmp_mce[i-1]->node);
>> +		kfree(tmp_mce[i-1]);
>
>Minor nit: checkpatch complains about the above: spaces are needed
>around '-'.
[Suman] This code should be updated in v4.
>
>> +		mce_list->count--;
>> +		i--;
>> +	}
>> +
>> +	return -ENOMEM;
>> +}
>> +
>>  static int nix_update_mce_list_entry(struct nix_mce_list *mce_list,
>>  				     u16 pcifunc, bool add)
>>  {
>
>[...]
>> @@ -3237,13 +3473,30 @@ static int nix_setup_mcast(struct rvu *rvu,
>struct nix_hw *nix_hw, int blkaddr)
>>  	int err, size;
>>
>>  	size = (rvu_read64(rvu, blkaddr, NIX_AF_CONST3) >> 16) & 0x0F;
>> -	size = (1ULL << size);
>> +	size = BIT_ULL(size);
>> +
>> +	/* Allocate bitmap for rx mce entries */
>> +	mcast->mce_counter[NIX_MCAST_INGRESS].max = 256UL << MC_TBL_SIZE;
>> +	err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> +	if (err)
>> +		return -ENOMEM;
>> +
>> +	/* Allocate bitmap for tx mce entries */
>> +	mcast->mce_counter[NIX_MCAST_EGRESS].max = MC_TX_MAX;
>> +	err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
>> +	if (err) {
>> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> +		return -ENOMEM;
>> +	}
>>
>>  	/* Alloc memory for multicast/mirror replication entries */
>>  	err = qmem_alloc(rvu->dev, &mcast->mce_ctx,
>> -			 (256UL << MC_TBL_SIZE), size);
>> -	if (err)
>> +			 mcast->mce_counter[NIX_MCAST_INGRESS].max, size);
>> +	if (err) {
>> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
>>  		return -ENOMEM;
>> +	}
>>
>>  	rvu_write64(rvu, blkaddr, NIX_AF_RX_MCAST_BASE,
>>  		    (u64)mcast->mce_ctx->iova);
>> @@ -3256,8 +3509,12 @@ static int nix_setup_mcast(struct rvu *rvu,
>struct nix_hw *nix_hw, int blkaddr)
>>  	size = rvu_read64(rvu, blkaddr, NIX_AF_MC_MIRROR_CONST) & 0xFFFF;
>>  	err = qmem_alloc(rvu->dev, &mcast->mcast_buf,
>>  			 (8UL << MC_BUF_CNT), size);
>> -	if (err)
>> +	if (err) {
>> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> +		rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
>> +		qmem_free(rvu->dev, mcast->mce_ctx);
>
>AFAICS, the above lines frees struct that was already allocated prior to
>this patch. It looks like a fix possibly worthy a separate patch.
[Suman] Okay. I will push a separate patch for qmem_free()
>
>[...]
>> +static void nix_mcast_update_mce_entry(struct rvu *rvu, u16 pcifunc,
>> +u8 is_active) {
>> +	struct nix_mcast_grp_elem *elem;
>> +	struct nix_mcast_grp *mcast_grp;
>> +	struct nix_hw *nix_hw;
>> +	int blkaddr;
>> +
>> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
>> +	nix_hw = get_nix_hw(rvu->hw, blkaddr);
>> +	if (!nix_hw)
>> +		return;
>> +
>> +	mcast_grp = &nix_hw->mcast_grp;
>> +
>> +	mutex_lock(&mcast_grp->mcast_grp_lock);
>> +	list_for_each_entry(elem, &mcast_grp->mcast_grp_head, list) {
>> +		struct nix_mce_list *mce_list;
>> +		struct mce *mce;
>> +
>> +		/* Iterate the group elements and disable the element which
>> +		 * received the disable request.
>> +		 */
>> +		mce_list = &elem->mcast_mce_list;
>> +		hlist_for_each_entry(mce, &mce_list->head, node) {
>> +			if (mce->pcifunc == pcifunc) {
>> +				if (is_active)
>> +					mce->is_active = 1;
>> +				else
>> +					mce->is_active = 0;
>
>just:
[Suman] ack, will update in v4
>
>				mce->is_active = is_active;
>
>> +
>> +				break;
>> +			}
>> +		}
>> +
>> +		/* Dump the updated list to HW */
>> +		if (elem->dir == NIX_MCAST_INGRESS)
>> +			nix_update_ingress_mce_list_hw(rvu, nix_hw, elem);
>> +		else
>> +			nix_update_egress_mce_list_hw(rvu, nix_hw, elem);
>> +
>> +		/* Update the multicast index in NPC rule */
>> +		nix_mcast_update_action(rvu, elem);
>> +	}
>> +	mutex_unlock(&mcast_grp->mcast_grp_lock);
>> +}
>> +
>>  int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req
>*req,
>>  				     struct msg_rsp *rsp)
>>  {
>
>[...]
>> @@ -5797,3 +6134,327 @@ int
>> rvu_mbox_handler_nix_bandprof_get_hwinfo(struct rvu *rvu, struct
>> msg_req *re
>>
>>  	return 0;
>>  }
>> +
>> +static struct nix_mcast_grp_elem *rvu_nix_mcast_find_grp_elem(struct
>nix_mcast_grp *mcast_grp,
>> +							      u32 mcast_grp_idx)
>> +{
>> +	struct nix_mcast_grp_elem *iter, *result_iter = NULL;
>> +
>> +	mutex_lock(&mcast_grp->mcast_grp_lock);
>> +	list_for_each_entry(iter, &mcast_grp->mcast_grp_head, list) {
>> +		if (iter->mcast_grp_idx == mcast_grp_idx) {
>> +			result_iter = iter;
>> +			break;
>> +		}
>> +	}
>> +	mutex_unlock(&mcast_grp->mcast_grp_lock);
>> +
>> +	return result_iter;
>
>What prevents 'result_iter' from being freed at this point? (and causing
>a later UaF)
[Suman] I updated this as part of a review comment, but yes, it is not correct. Will revert to older version in v4.
>
>> +}
>> +
>> +int rvu_nix_mcast_get_mce_index(struct rvu *rvu, u16 pcifunc, u32
>> +mcast_grp_idx) {
>> +	struct nix_mcast_grp_elem *elem;
>> +	struct nix_mcast_grp *mcast_grp;
>> +	struct nix_hw *nix_hw;
>> +	int blkaddr;
>> +
>> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
>> +	nix_hw = get_nix_hw(rvu->hw, blkaddr);
>> +	if (!nix_hw)
>> +		return NIX_AF_ERR_INVALID_NIXBLK;
>> +
>> +	mcast_grp = &nix_hw->mcast_grp;
>> +	elem = rvu_nix_mcast_find_grp_elem(mcast_grp, mcast_grp_idx);
>> +	if (!elem)
>> +		return NIX_AF_ERR_INVALID_MCAST_GRP;
>> +
>> +	return elem->mce_start_index;
>> +}
>> +
>> +void rvu_nix_mcast_flr_free_entries(struct rvu *rvu, u16 pcifunc) {
>> +	struct nix_mcast_grp_destroy_req dreq = { 0 };
>> +	struct nix_mcast_grp_update_req ureq = { 0 };
>> +	struct nix_mcast_grp_update_rsp ursp = { 0 };
>> +	struct nix_mcast_grp_elem *elem, *tmp;
>> +	struct nix_mcast_grp *mcast_grp;
>> +	struct nix_hw *nix_hw;
>> +	int blkaddr;
>> +
>> +	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
>> +	nix_hw = get_nix_hw(rvu->hw, blkaddr);
>> +	if (!nix_hw)
>> +		return;
>> +
>> +	mcast_grp = &nix_hw->mcast_grp;
>> +
>> +	mutex_lock(&mcast_grp->mcast_grp_lock);
>> +	list_for_each_entry_safe(elem, tmp, &mcast_grp->mcast_grp_head,
>list) {
>> +		struct nix_mce_list *mce_list;
>> +		struct hlist_node *tmp;
>> +		struct mce *mce;
>> +
>> +		/* If the pcifunc which created the multicast/mirror
>> +		 * group received an FLR, then delete the entire group.
>> +		 */
>> +		if (elem->pcifunc == pcifunc) {
>> +			mutex_unlock(&mcast_grp->mcast_grp_lock);
>> +			/* Delete group */
>> +			dreq.hdr.pcifunc = elem->pcifunc;
>> +			dreq.mcast_grp_idx = elem->mcast_grp_idx;
>> +			rvu_mbox_handler_nix_mcast_grp_destroy(rvu, &dreq, NULL);
>> +			mutex_lock(&mcast_grp->mcast_grp_lock);
>
>What prevents 'tmp' from being removed and freed while the
>'mcast_grp_lock' is not held? there are other similar chunks later.
>
>I'm under the impression that all the multicast data is touched under
>the rtnl lock protection so the above lock does not matter much, but I
>really did not validate such impression/guess.
>
>Generally speaking the lock schema here looks complex and hard to
>follow: a more descriptive changelog will help.
[Suman] Got it. I will try to simplify the logic and push a v4.
>
>Cheers,
>
>Paolo