[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<PH7PR21MB32601A9B8D75C164AF9B18C2CA962@PH7PR21MB3260.namprd21.prod.outlook.com>
Date: Thu, 29 Aug 2024 16:55:06 +0000
From: Haiyang Zhang <haiyangz@...rosoft.com>
To: Souradeep Chakrabarti <schakrabarti@...ux.microsoft.com>, KY Srinivasan
<kys@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>, Dexuan Cui
<decui@...rosoft.com>, "davem@...emloft.net" <davem@...emloft.net>, Long Li
<longli@...rosoft.com>, "ssengar@...ux.microsoft.com"
<ssengar@...ux.microsoft.com>, "linux-hyperv@...r.kernel.org"
<linux-hyperv@...r.kernel.org>, "netdev@...r.kernel.org"
<netdev@...r.kernel.org>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "linux-rdma@...r.kernel.org"
<linux-rdma@...r.kernel.org>
CC: Souradeep Chakrabarti <schakrabarti@...rosoft.com>,
"stable@...r.kernel.org" <stable@...r.kernel.org>
Subject: RE: [PATCH V3 net] net: mana: Fix error handling in
mana_create_txq/rxq's NAPI cleanup
> -----Original Message-----
> From: Souradeep Chakrabarti <schakrabarti@...ux.microsoft.com>
> Sent: Thursday, August 29, 2024 4:37 AM
> To: KY Srinivasan <kys@...rosoft.com>; Haiyang Zhang
> <haiyangz@...rosoft.com>; wei.liu@...nel.org; Dexuan Cui
> <decui@...rosoft.com>; davem@...emloft.net; Long Li
> <longli@...rosoft.com>; ssengar@...ux.microsoft.com; linux-
> hyperv@...r.kernel.org; netdev@...r.kernel.org; linux-
> kernel@...r.kernel.org; linux-rdma@...r.kernel.org
> Cc: Souradeep Chakrabarti <schakrabarti@...rosoft.com>; Souradeep
> Chakrabarti <schakrabarti@...ux.microsoft.com>; stable@...r.kernel.org
> Subject: [PATCH V3 net] net: mana: Fix error handling in
> mana_create_txq/rxq's NAPI cleanup
>
> Currently napi_disable() gets called during rxq and txq cleanup,
> even before napi is enabled and hrtimer is initialized. It causes
> kernel panic.
>
> ? page_fault_oops+0x136/0x2b0
> ? page_counter_cancel+0x2e/0x80
> ? do_user_addr_fault+0x2f2/0x640
> ? refill_obj_stock+0xc4/0x110
> ? exc_page_fault+0x71/0x160
> ? asm_exc_page_fault+0x27/0x30
> ? __mmdrop+0x10/0x180
> ? __mmdrop+0xec/0x180
> ? hrtimer_active+0xd/0x50
> hrtimer_try_to_cancel+0x2c/0xf0
> hrtimer_cancel+0x15/0x30
> napi_disable+0x65/0x90
> mana_destroy_rxq+0x4c/0x2f0
> mana_create_rxq.isra.0+0x56c/0x6d0
> ? mana_uncfg_vport+0x50/0x50
> mana_alloc_queues+0x21b/0x320
> ? skb_dequeue+0x5f/0x80
>
> Cc: stable@...r.kernel.org
> Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
> Signed-off-by: Souradeep Chakrabarti <schakrabarti@...ux.microsoft.com>
> Reviewed-by: Haiyang Zhang <haiyangz@...rosoft.com>
> Reviewed-by: Shradha Gupta <shradhagupta@...ux.microsoft.com>
> ---
> V3 -> V2:
> Instead of using napi internal attribute, using an atomic
> attribute to verify napi is initialized for a particular txq / rxq.
>
> V2 -> V1:
> Addressed the comment on cleaning up napi for the queues,
> where queue creation was successful.
> ---
> drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++-------
> include/net/mana/mana.h | 4 +++
> 2 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 39f56973746d..bd303c89cfa6 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct
> mana_port_context *apc)
>
> for (i = 0; i < apc->num_queues; i++) {
> napi = &apc->tx_qp[i].tx_cq.napi;
> - napi_synchronize(napi);
> - napi_disable(napi);
> - netif_napi_del(napi);
> -
> + if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
> + napi_synchronize(napi);
> + napi_disable(napi);
> + netif_napi_del(napi);
> + atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
> + }
> mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
>
> mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
> @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context
> *apc,
> txq->ndev = net;
> txq->net_txq = netdev_get_tx_queue(net, i);
> txq->vp_offset = apc->tx_vp_offset;
> + atomic_set(&txq->napi_initialized, 0);
> skb_queue_head_init(&txq->pending_skbs);
>
> memset(&spec, 0, sizeof(spec));
> @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context
> *apc,
>
> netif_napi_add_tx(net, &cq->napi, mana_poll);
> napi_enable(&cq->napi);
> + atomic_set(&txq->napi_initialized, 1);
>
> mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
> }
> @@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct
> mana_port_context *apc,
>
> napi = &rxq->rx_cq.napi;
>
> - if (validate_state)
> - napi_synchronize(napi);
> + if (atomic_read(&rxq->napi_initialized)) {
>
> - napi_disable(napi);
> + if (validate_state)
> + napi_synchronize(napi);
>
> - xdp_rxq_info_unreg(&rxq->xdp_rxq);
> + napi_disable(napi);
>
> - netif_napi_del(napi);
> + netif_napi_del(napi);
> + atomic_set(&rxq->napi_initialized, 0);
> + }
> +
> + xdp_rxq_info_unreg(&rxq->xdp_rxq);
>
> mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
>
> @@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct
> mana_port_context *apc,
> rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
> rxq->rxq_idx = rxq_idx;
> rxq->rxobj = INVALID_MANA_HANDLE;
> + atomic_set(&rxq->napi_initialized, 0);
>
> mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
> &rxq->headroom);
> @@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct
> mana_port_context *apc,
>
> napi_enable(&cq->napi);
>
> + atomic_set(&rxq->napi_initialized, 1);
> +
> mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
> out:
> if (!err)
> @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct
> mana_port_context *apc)
> rxq = apc->rxqs[rxq_idx];
> if (!rxq)
> continue;
> -
> mana_destroy_rxq(apc, rxq, true);
Also, this line removal is not necessary.
Powered by blists - more mailing lists