[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <01f27ba0-6239-4195-beda-bc3fea1a30cf@oracle.com>
Date: Mon, 12 May 2025 10:59:43 +0530
From: ALOK TIWARI <alok.a.tiwari@...cle.com>
To: Konstantin Taranov <kotaranov@...ux.microsoft.com>,
kotaranov@...rosoft.com, pabeni@...hat.com, haiyangz@...rosoft.com,
kys@...rosoft.com, edumazet@...gle.com, kuba@...nel.org,
davem@...emloft.net, decui@...rosoft.com, wei.liu@...nel.org,
longli@...rosoft.com, jgg@...pe.ca, leon@...nel.org
Cc: linux-rdma@...r.kernel.org, linux-kernel@...r.kernel.org,
netdev@...r.kernel.org
Subject: Re: [PATCH rdma-next v4 4/4] net: mana: Add support for auxiliary
device servicing events
On 07-05-2025 21:29, Konstantin Taranov wrote:
> From: Shiraz Saleem <shirazsaleem@...rosoft.com>
>
> Handle soc servcing events which require the rdma auxiliary device resources to
typo servcing ->servicing
> be cleaned up during a suspend, and re-initialized during a resume.
>
> Signed-off-by: Shiraz Saleem <shirazsaleem@...rosoft.com>
> Signed-off-by: Konstantin Taranov <kotaranov@...rosoft.com>
> ---
> .../net/ethernet/microsoft/mana/gdma_main.c | 11 ++-
> .../net/ethernet/microsoft/mana/hw_channel.c | 20 ++++++
> drivers/net/ethernet/microsoft/mana/mana_en.c | 69 +++++++++++++++++++
> include/net/mana/gdma.h | 19 +++++
> include/net/mana/hw_channel.h | 9 +++
> 5 files changed, 127 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 59e7814..3504507 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -391,6 +391,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
> case GDMA_EQE_HWC_INIT_EQ_ID_DB:
> case GDMA_EQE_HWC_INIT_DATA:
> case GDMA_EQE_HWC_INIT_DONE:
> + case GDMA_EQE_HWC_SOC_SERVICE:
> case GDMA_EQE_RNIC_QP_FATAL:
> if (!eq->eq.callback)
> break;
> @@ -1468,10 +1469,14 @@ static int mana_gd_setup(struct pci_dev *pdev)
> mana_gd_init_registers(pdev);
> mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
>
> + gc->service_wq = alloc_ordered_workqueue("gdma_service_wq", 0);
> + if (!gc->service_wq)
> + return -ENOMEM;
> +
> err = mana_gd_setup_irqs(pdev);
> if (err) {
> dev_err(gc->dev, "Failed to setup IRQs: %d\n", err);
> - return err;
> + goto free_workqueue;
> }
>
> err = mana_hwc_create_channel(gc);
> @@ -1497,6 +1502,8 @@ destroy_hwc:
> mana_hwc_destroy_channel(gc);
> remove_irq:
> mana_gd_remove_irqs(pdev);
> +free_workqueue:
> + destroy_workqueue(gc->service_wq);
> dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err);
> return err;
> }
> @@ -1508,6 +1515,8 @@ static void mana_gd_cleanup(struct pci_dev *pdev)
> mana_hwc_destroy_channel(gc);
>
> mana_gd_remove_irqs(pdev);
> +
> + destroy_workqueue(gc->service_wq);
> dev_dbg(&pdev->dev, "mana gdma cleanup successful\n");
> }
>
> diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> index 1ba4960..60f6bc1 100644
> --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> @@ -112,11 +112,13 @@ out:
> static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
> struct gdma_event *event)
> {
> + union hwc_init_soc_service_type service_data;
> struct hw_channel_context *hwc = ctx;
> struct gdma_dev *gd = hwc->gdma_dev;
> union hwc_init_type_data type_data;
> union hwc_init_eq_id_db eq_db;
> u32 type, val;
> + int ret;
>
> switch (event->type) {
> case GDMA_EQE_HWC_INIT_EQ_ID_DB:
> @@ -199,7 +201,25 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
> }
>
> break;
> + case GDMA_EQE_HWC_SOC_SERVICE:
> + service_data.as_uint32 = event->details[0];
> + type = service_data.type;
> + val = service_data.value;
what is use of val?
>
> + switch (type) {
> + case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
> + case GDMA_SERVICE_TYPE_RDMA_RESUME:
> + ret = mana_rdma_service_event(gd->gdma_context, type);
> + if (ret)
> + dev_err(hwc->dev, "Failed to schedule adev service event: %d\n",
> + ret);
> + break;
> + default:
> + dev_warn(hwc->dev, "Received unknown SOC service type %u\n", type);
> + break;
> + }
> +
> + break;
> default:
> dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type);
> /* Ignore unknown events, which should never happen. */
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 2013d0e..39e01e2 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -2992,6 +2992,70 @@ idx_fail:
> return ret;
> }
>
> +static void mana_handle_rdma_servicing(struct work_struct *work)
this name does not sound clearer and more aligned with typical naming
conventions.
Since it is an RDMA service event handler, it could be named to
mana_rdma_service_handle. What are your thoughts on this?"
> +{
> + struct mana_service_work *serv_work =
> + container_of(work, struct mana_service_work, work);
> + struct gdma_dev *gd = serv_work->gdma_dev;
> + struct device *dev = gd->gdma_context->dev;
> + int ret;
> +
> + if (READ_ONCE(gd->rdma_teardown))
> + goto out;
> +
> + switch (serv_work->event) {
> + case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
> + if (!gd->adev || gd->is_suspended)
> + break;
> +
> + remove_adev(gd);
> + gd->is_suspended = true;
> + break;
> +
> + case GDMA_SERVICE_TYPE_RDMA_RESUME:
> + if (!gd->is_suspended)
> + break;
> +
> + ret = add_adev(gd, "rdma");
> + if (ret)
> + dev_err(dev, "Failed to add adev on resume: %d\n", ret);
> + else
> + gd->is_suspended = false;
> + break;
> +
> + default:
> + dev_warn(dev, "unknown adev service event %u\n",
> + serv_work->event);
> + break;
> + }
> +
> +out:
> + kfree(serv_work);
> +}
> +
> +int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event)
> +{
> + struct gdma_dev *gd = &gc->mana_ib;
> + struct mana_service_work *serv_work;
> +
> + if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
> + /* RDMA device is not detected on pci */
> + return 0;
> + }
> +
> + serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC);
> + if (!serv_work)
> + return -ENOMEM;
> +
> + serv_work->event = event;
> + serv_work->gdma_dev = gd;
> +
> + INIT_WORK(&serv_work->work, mana_handle_rdma_servicing);
> + queue_work(gc->service_wq, &serv_work->work);
> +
> + return 0;
> +}
> +
> int mana_probe(struct gdma_dev *gd, bool resuming)
> {
> struct gdma_context *gc = gd->gdma_context;
> @@ -3172,11 +3236,16 @@ int mana_rdma_probe(struct gdma_dev *gd)
>
> void mana_rdma_remove(struct gdma_dev *gd)
> {
> + struct gdma_context *gc = gd->gdma_context;
> +
> if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
> /* RDMA device is not detected on pci */
> return;
> }
>
> + WRITE_ONCE(gd->rdma_teardown, true);
> + flush_workqueue(gc->service_wq);
> +
> if (gd->adev)
> remove_adev(gd);
>
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index ffa9820..3ce56a8 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -60,6 +60,7 @@ enum gdma_eqe_type {
> GDMA_EQE_HWC_INIT_DONE = 131,
> GDMA_EQE_HWC_SOC_RECONFIG = 132,
> GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
> + GDMA_EQE_HWC_SOC_SERVICE = 134,
> GDMA_EQE_RNIC_QP_FATAL = 176,
> };
>
> @@ -70,6 +71,18 @@ enum {
> GDMA_DEVICE_MANA_IB = 3,
> };
>
> +enum gdma_service_type {
> + GDMA_SERVICE_TYPE_NONE = 0,
> + GDMA_SERVICE_TYPE_RDMA_SUSPEND = 1,
> + GDMA_SERVICE_TYPE_RDMA_RESUME = 2,
> +};
> +
> +struct mana_service_work {
> + struct work_struct work;
> + struct gdma_dev *gdma_dev;
> + enum gdma_service_type event;
> +};
> +
> struct gdma_resource {
> /* Protect the bitmap */
> spinlock_t lock;
> @@ -224,6 +237,8 @@ struct gdma_dev {
> void *driver_data;
>
> struct auxiliary_device *adev;
> + bool is_suspended;
> + bool rdma_teardown;
> };
>
> /* MANA_PAGE_SIZE is the DMA unit */
> @@ -409,6 +424,8 @@ struct gdma_context {
> struct gdma_dev mana_ib;
>
> u64 pf_cap_flags1;
> +
> + struct workqueue_struct *service_wq;
> };
Thanks,
Alok
Powered by blists - more mailing lists