[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID:
<DS3PR21MB5735AB4B4D1629B6D375EF72CEC9A@DS3PR21MB5735.namprd21.prod.outlook.com>
Date: Mon, 17 Nov 2025 20:39:48 +0000
From: Long Li <longli@...rosoft.com>
To: Haiyang Zhang <haiyangz@...rosoft.com>, "longli@...ux.microsoft.com"
<longli@...ux.microsoft.com>, KY Srinivasan <kys@...rosoft.com>, Wei Liu
<wei.liu@...nel.org>, Dexuan Cui <DECUI@...rosoft.com>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
<kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Shradha Gupta
<shradhagupta@...ux.microsoft.com>, Simon Horman <horms@...nel.org>,
Konstantin Taranov <kotaranov@...rosoft.com>, Souradeep Chakrabarti
<schakrabarti@...ux.microsoft.com>, Erick Archer <erick.archer@...look.com>,
"linux-hyperv@...r.kernel.org" <linux-hyperv@...r.kernel.org>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-rdma@...r.kernel.org" <linux-rdma@...r.kernel.org>
Subject: RE: [patch net-next] net: mana: Handle hardware reset events when
probing the device
> Subject: RE: [patch net-next] net: mana: Handle hardware reset events when
> probing the device
>
>
>
> > -----Original Message-----
> > From: longli@...ux.microsoft.com <longli@...ux.microsoft.com>
> > Sent: Friday, November 14, 2025 9:29 PM
> > To: KY Srinivasan <kys@...rosoft.com>; Haiyang Zhang
> > <haiyangz@...rosoft.com>; Wei Liu <wei.liu@...nel.org>; Dexuan Cui
> > <DECUI@...rosoft.com>; David S. Miller <davem@...emloft.net>; Eric
> > Dumazet <edumazet@...gle.com>; Jakub Kicinski <kuba@...nel.org>;
> Paolo
> > Abeni <pabeni@...hat.com>; Shradha Gupta
> > <shradhagupta@...ux.microsoft.com>;
> > Simon Horman <horms@...nel.org>; Konstantin Taranov
> > <kotaranov@...rosoft.com>; Souradeep Chakrabarti
> > <schakrabarti@...ux.microsoft.com>; Erick Archer
> > <erick.archer@...look.com>; linux-hyperv@...r.kernel.org;
> > netdev@...r.kernel.org; linux-kernel@...r.kernel.org; linux-
> > rdma@...r.kernel.org
> > Cc: Long Li <longli@...rosoft.com>
> > Subject: [patch net-next] net: mana: Handle hardware reset events when
> > probing the device
> >
> > From: Long Li <longli@...rosoft.com>
> >
> > When MANA is being probed, it's possible that hardware is in recovery
> > mode and the device may get GDMA_EQE_HWC_RESET_REQUEST over HWC
> in the
> > middle of the probe. Detect such condition and go through the recovery
> > service procedure.
> >
> > Fixes: fbe346ce9d62 ("net: mana: Handle Reset Request from MANA NIC")
> > Signed-off-by: Long Li <longli@...rosoft.com>
> > ---
> > .../net/ethernet/microsoft/mana/gdma_main.c | 131 +++++++++++++++-
> --
> > include/net/mana/gdma.h | 9 +-
> > 2 files changed, 122 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > index effe0a2f207a..1d9c2beb22b2 100644
> > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> > @@ -15,6 +15,12 @@
> >
> > struct dentry *mana_debugfs_root;
> >
> > +static struct mana_serv_delayed_work {
> > + struct delayed_work work;
> > + struct pci_dev *pdev;
> > + enum gdma_eqe_type type;
> > +} mns_delayed_wk;
> > +
> > static u32 mana_gd_r32(struct gdma_context *g, u64 offset) {
> > return readl(g->bar0_va + offset);
> > @@ -387,6 +393,25 @@ EXPORT_SYMBOL_NS(mana_gd_ring_cq,
> "NET_MANA");
> >
> > #define MANA_SERVICE_PERIOD 10
> >
> > +static void mana_serv_rescan(struct pci_dev *pdev) {
> > + struct pci_bus *parent;
> > +
> > + pci_lock_rescan_remove();
> > +
> > + parent = pdev->bus;
> > + if (!parent) {
> > + dev_err(&pdev->dev, "MANA service: no parent bus\n");
> > + goto out;
> > + }
> > +
> > + pci_stop_and_remove_bus_device(pdev);
> > + pci_rescan_bus(parent);
> > +
> > +out:
> > + pci_unlock_rescan_remove();
> > +}
> > +
> > static void mana_serv_fpga(struct pci_dev *pdev) {
> > struct pci_bus *bus, *parent;
> > @@ -419,9 +444,12 @@ static void mana_serv_reset(struct pci_dev *pdev)
> > {
> > struct gdma_context *gc = pci_get_drvdata(pdev);
> > struct hw_channel_context *hwc;
> > + int ret;
> >
> > if (!gc) {
> > - dev_err(&pdev->dev, "MANA service: no GC\n");
> > + /* Perform PCI rescan on device if GC is not set up */
> > + dev_err(&pdev->dev, "MANA service: GC not setup,
> > rescanning\n");
> > + mana_serv_rescan(pdev);
> > return;
> > }
> >
> > @@ -440,9 +468,18 @@ static void mana_serv_reset(struct pci_dev *pdev)
> >
> > msleep(MANA_SERVICE_PERIOD * 1000);
> >
> > - mana_gd_resume(pdev);
> > + ret = mana_gd_resume(pdev);
> > + if (ret == -ETIMEDOUT || ret == -EPROTO) {
> > + /* Perform PCI rescan on device if we failed on HWC */
> > + dev_err(&pdev->dev, "MANA service: resume failed,
> > rescanning\n");
> > + mana_serv_rescan(pdev);
> > + goto out;
> > + }
> >
> > - dev_info(&pdev->dev, "MANA reset cycle completed\n");
> > + if (ret)
> > + dev_info(&pdev->dev, "MANA reset cycle failed err %d\n",
> ret);
> > + else
> > + dev_info(&pdev->dev, "MANA reset cycle completed\n");
> >
> > out:
> > gc->in_service = false;
> > @@ -454,18 +491,9 @@ struct mana_serv_work {
> > enum gdma_eqe_type type;
> > };
> >
> > -static void mana_serv_func(struct work_struct *w)
> > +static void mana_do_service(enum gdma_eqe_type type, struct pci_dev
> > *pdev)
> > {
> > - struct mana_serv_work *mns_wk;
> > - struct pci_dev *pdev;
> > -
> > - mns_wk = container_of(w, struct mana_serv_work, serv_work);
> > - pdev = mns_wk->pdev;
> > -
> > - if (!pdev)
> > - goto out;
> > -
> > - switch (mns_wk->type) {
> > + switch (type) {
> > case GDMA_EQE_HWC_FPGA_RECONFIG:
> > mana_serv_fpga(pdev);
> > break;
> > @@ -475,12 +503,36 @@ static void mana_serv_func(struct work_struct
> *w)
> > break;
> >
> > default:
> > - dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> > - mns_wk->type);
> > + dev_err(&pdev->dev, "MANA service: unknown type %d\n",
> type);
> > break;
> > }
> > +}
> > +
> > +static void mana_serv_delayed_func(struct work_struct *w) {
> > + struct mana_serv_delayed_work *dwork;
> > + struct pci_dev *pdev;
> > +
> > + dwork = container_of(w, struct mana_serv_delayed_work,
> work.work);
> > + pdev = dwork->pdev;
> > +
> > + if (pdev)
> > + mana_do_service(dwork->type, pdev);
> > +
> > + pci_dev_put(pdev);
> > +}
> > +
> > +static void mana_serv_func(struct work_struct *w) {
> > + struct mana_serv_work *mns_wk;
> > + struct pci_dev *pdev;
> > +
> > + mns_wk = container_of(w, struct mana_serv_work, serv_work);
> > + pdev = mns_wk->pdev;
> > +
> > + if (pdev)
> > + mana_do_service(mns_wk->type, pdev);
> >
> > -out:
> > pci_dev_put(pdev);
> > kfree(mns_wk);
> > module_put(THIS_MODULE);
> > @@ -541,6 +593,17 @@ static void mana_gd_process_eqe(struct
> gdma_queue
> > *eq)
> > case GDMA_EQE_HWC_RESET_REQUEST:
> > dev_info(gc->dev, "Recv MANA service type:%d\n", type);
> >
> > + if (atomic_inc_return(&gc->in_probe) == 1) {
>
> Since we don't care about how many times it entered probe/service,
> test_and_set_bit() should be sufficient here.
>
> > + /*
> > + * Device is in probe and we received an hardware
> reset
> > + * event, probe() will detect that "in_probe" has
> > + * changed and perform service procedure.
> > + */
> > + dev_info(gc->dev,
> > + "Service is to be processed in probe\n");
> > + break;
> > + }
> > +
> > if (gc->in_service) {
> > dev_info(gc->dev, "Already in service\n");
> > break;
> > @@ -1930,6 +1993,8 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> > gc->mana_pci_debugfs =
> debugfs_create_dir(pci_slot_name(pdev-
> > >slot),
> >
> mana_debugfs_root);
> >
> > + atomic_set(&gc->in_probe, 0);
> > +
> > err = mana_gd_setup(pdev);
> > if (err)
> > goto unmap_bar;
> > @@ -1942,8 +2007,19 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> > if (err)
> > goto cleanup_mana;
> >
> > + /*
> > + * If a hardware reset event has occurred over HWC during probe,
> > + * rollback and perform hardware reset procedure.
> > + */
> > + if (atomic_inc_return(&gc->in_probe) > 1) {
> > + err = -EPROTO;
> > + goto cleanup_mana_rdma;
> > + }
> > +
> > return 0;
> >
> > +cleanup_mana_rdma:
> > + mana_rdma_remove(&gc->mana_ib);
> > cleanup_mana:
> > mana_remove(&gc->mana, false);
> > cleanup_gd:
> > @@ -1967,6 +2043,25 @@ static int mana_gd_probe(struct pci_dev *pdev,
> > const struct pci_device_id *ent)
> > disable_dev:
> > pci_disable_device(pdev);
> > dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
> > +
> > + /*
> > + * Hardware could be in recovery mode and the HWC returns
> TIMEDOUT
> > or
> > + * EPROTO from mana_gd_setup(), mana_probe() or
> mana_rdma_probe(),
> > or
> > + * we received a hardware reset event over HWC interrupt. In this
> > case,
> > + * perform the device recovery procedure after
> MANA_SERVICE_PERIOD
> > + * seconds.
> > + */
> > + if (err == -ETIMEDOUT || err == -EPROTO) {
> > + dev_info(&pdev->dev, "Start MANA recovery mode\n");
> > +
> > + mns_delayed_wk.pdev = pci_dev_get(pdev);
> > + mns_delayed_wk.type = GDMA_EQE_HWC_RESET_REQUEST;
> > +
> > + INIT_DELAYED_WORK(&mns_delayed_wk.work,
> > mana_serv_delayed_func);
>
> To avoid INIT_DELAYED_WORK potentially multiple times this should be in the
> mana_driver_init()
>
> > + schedule_delayed_work(&mns_delayed_wk.work,
> > + secs_to_jiffies(MANA_SERVICE_PERIOD));
> > + }
> > +
> > return err;
> > }
> >
> > @@ -2084,6 +2179,8 @@ static int __init mana_driver_init(void)
> >
> > static void __exit mana_driver_exit(void) {
> > + cancel_delayed_work_sync(&mns_delayed_wk.work);
>
> I think we should call disable_delayed_work_sync() to prevent the work
> scheduled again after this line.
Thank you. I will send v2 to address all the comments and support multiple PCI devices in BM mode.
Long
Powered by blists - more mailing lists