lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <01f27ba0-6239-4195-beda-bc3fea1a30cf@oracle.com>
Date: Mon, 12 May 2025 10:59:43 +0530
From: ALOK TIWARI <alok.a.tiwari@...cle.com>
To: Konstantin Taranov <kotaranov@...ux.microsoft.com>,
        kotaranov@...rosoft.com, pabeni@...hat.com, haiyangz@...rosoft.com,
        kys@...rosoft.com, edumazet@...gle.com, kuba@...nel.org,
        davem@...emloft.net, decui@...rosoft.com, wei.liu@...nel.org,
        longli@...rosoft.com, jgg@...pe.ca, leon@...nel.org
Cc: linux-rdma@...r.kernel.org, linux-kernel@...r.kernel.org,
        netdev@...r.kernel.org
Subject: Re: [PATCH rdma-next v4 4/4] net: mana: Add support for auxiliary
 device servicing events



On 07-05-2025 21:29, Konstantin Taranov wrote:
> From: Shiraz Saleem <shirazsaleem@...rosoft.com>
> 
> Handle soc servcing events which require the rdma auxiliary device resources to

typo servcing ->servicing

> be cleaned up during a suspend, and re-initialized during a resume.
> 
> Signed-off-by: Shiraz Saleem <shirazsaleem@...rosoft.com>
> Signed-off-by: Konstantin Taranov <kotaranov@...rosoft.com>
> ---
>   .../net/ethernet/microsoft/mana/gdma_main.c   | 11 ++-
>   .../net/ethernet/microsoft/mana/hw_channel.c  | 20 ++++++
>   drivers/net/ethernet/microsoft/mana/mana_en.c | 69 +++++++++++++++++++
>   include/net/mana/gdma.h                       | 19 +++++
>   include/net/mana/hw_channel.h                 |  9 +++
>   5 files changed, 127 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 59e7814..3504507 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -391,6 +391,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
>   	case GDMA_EQE_HWC_INIT_EQ_ID_DB:
>   	case GDMA_EQE_HWC_INIT_DATA:
>   	case GDMA_EQE_HWC_INIT_DONE:
> +	case GDMA_EQE_HWC_SOC_SERVICE:
>   	case GDMA_EQE_RNIC_QP_FATAL:
>   		if (!eq->eq.callback)
>   			break;
> @@ -1468,10 +1469,14 @@ static int mana_gd_setup(struct pci_dev *pdev)
>   	mana_gd_init_registers(pdev);
>   	mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
>   
> +	gc->service_wq = alloc_ordered_workqueue("gdma_service_wq", 0);
> +	if (!gc->service_wq)
> +		return -ENOMEM;
> +
>   	err = mana_gd_setup_irqs(pdev);
>   	if (err) {
>   		dev_err(gc->dev, "Failed to setup IRQs: %d\n", err);
> -		return err;
> +		goto free_workqueue;
>   	}
>   
>   	err = mana_hwc_create_channel(gc);
> @@ -1497,6 +1502,8 @@ destroy_hwc:
>   	mana_hwc_destroy_channel(gc);
>   remove_irq:
>   	mana_gd_remove_irqs(pdev);
> +free_workqueue:
> +	destroy_workqueue(gc->service_wq);
>   	dev_err(&pdev->dev, "%s failed (error %d)\n", __func__, err);
>   	return err;
>   }
> @@ -1508,6 +1515,8 @@ static void mana_gd_cleanup(struct pci_dev *pdev)
>   	mana_hwc_destroy_channel(gc);
>   
>   	mana_gd_remove_irqs(pdev);
> +
> +	destroy_workqueue(gc->service_wq);
>   	dev_dbg(&pdev->dev, "mana gdma cleanup successful\n");
>   }
>   
> diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> index 1ba4960..60f6bc1 100644
> --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> @@ -112,11 +112,13 @@ out:
>   static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
>   					struct gdma_event *event)
>   {
> +	union hwc_init_soc_service_type service_data;
>   	struct hw_channel_context *hwc = ctx;
>   	struct gdma_dev *gd = hwc->gdma_dev;
>   	union hwc_init_type_data type_data;
>   	union hwc_init_eq_id_db eq_db;
>   	u32 type, val;
> +	int ret;
>   
>   	switch (event->type) {
>   	case GDMA_EQE_HWC_INIT_EQ_ID_DB:
> @@ -199,7 +201,25 @@ static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
>   		}
>   
>   		break;
> +	case GDMA_EQE_HWC_SOC_SERVICE:
> +		service_data.as_uint32 = event->details[0];
> +		type = service_data.type;
> +		val = service_data.value;

what is use of val?

>   
> +		switch (type) {
> +		case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
> +		case GDMA_SERVICE_TYPE_RDMA_RESUME:
> +			ret = mana_rdma_service_event(gd->gdma_context, type);
> +			if (ret)
> +				dev_err(hwc->dev, "Failed to schedule adev service event: %d\n",
> +					ret);
> +			break;
> +		default:
> +			dev_warn(hwc->dev, "Received unknown SOC service type %u\n", type);
> +			break;
> +		}
> +
> +		break;
>   	default:
>   		dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type);
>   		/* Ignore unknown events, which should never happen. */
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 2013d0e..39e01e2 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -2992,6 +2992,70 @@ idx_fail:
>   	return ret;
>   }
>   
> +static void mana_handle_rdma_servicing(struct work_struct *work)

this name does not sound clearer and more aligned with typical naming 
conventions.
Since it is an RDMA service event handler, it could be named to 
mana_rdma_service_handle. What are your thoughts on this?"

> +{
> +	struct mana_service_work *serv_work =
> +		container_of(work, struct mana_service_work, work);
> +	struct gdma_dev *gd = serv_work->gdma_dev;
> +	struct device *dev = gd->gdma_context->dev;
> +	int ret;
> +
> +	if (READ_ONCE(gd->rdma_teardown))
> +		goto out;
> +
> +	switch (serv_work->event) {
> +	case GDMA_SERVICE_TYPE_RDMA_SUSPEND:
> +		if (!gd->adev || gd->is_suspended)
> +			break;
> +
> +		remove_adev(gd);
> +		gd->is_suspended = true;
> +		break;
> +
> +	case GDMA_SERVICE_TYPE_RDMA_RESUME:
> +		if (!gd->is_suspended)
> +			break;
> +
> +		ret = add_adev(gd, "rdma");
> +		if (ret)
> +			dev_err(dev, "Failed to add adev on resume: %d\n", ret);
> +		else
> +			gd->is_suspended = false;
> +		break;
> +
> +	default:
> +		dev_warn(dev, "unknown adev service event %u\n",
> +			 serv_work->event);
> +		break;
> +	}
> +
> +out:
> +	kfree(serv_work);
> +}
> +
> +int mana_rdma_service_event(struct gdma_context *gc, enum gdma_service_type event)
> +{
> +	struct gdma_dev *gd = &gc->mana_ib;
> +	struct mana_service_work *serv_work;
> +
> +	if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
> +		/* RDMA device is not detected on pci */
> +		return 0;
> +	}
> +
> +	serv_work = kzalloc(sizeof(*serv_work), GFP_ATOMIC);
> +	if (!serv_work)
> +		return -ENOMEM;
> +
> +	serv_work->event = event;
> +	serv_work->gdma_dev = gd;
> +
> +	INIT_WORK(&serv_work->work, mana_handle_rdma_servicing);
> +	queue_work(gc->service_wq, &serv_work->work);
> +
> +	return 0;
> +}
> +
>   int mana_probe(struct gdma_dev *gd, bool resuming)
>   {
>   	struct gdma_context *gc = gd->gdma_context;
> @@ -3172,11 +3236,16 @@ int mana_rdma_probe(struct gdma_dev *gd)
>   
>   void mana_rdma_remove(struct gdma_dev *gd)
>   {
> +	struct gdma_context *gc = gd->gdma_context;
> +
>   	if (gd->dev_id.type != GDMA_DEVICE_MANA_IB) {
>   		/* RDMA device is not detected on pci */
>   		return;
>   	}
>   
> +	WRITE_ONCE(gd->rdma_teardown, true);
> +	flush_workqueue(gc->service_wq);
> +
>   	if (gd->adev)
>   		remove_adev(gd);
>   
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
> index ffa9820..3ce56a8 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -60,6 +60,7 @@ enum gdma_eqe_type {
>   	GDMA_EQE_HWC_INIT_DONE		= 131,
>   	GDMA_EQE_HWC_SOC_RECONFIG	= 132,
>   	GDMA_EQE_HWC_SOC_RECONFIG_DATA	= 133,
> +	GDMA_EQE_HWC_SOC_SERVICE	= 134,
>   	GDMA_EQE_RNIC_QP_FATAL		= 176,
>   };
>   
> @@ -70,6 +71,18 @@ enum {
>   	GDMA_DEVICE_MANA_IB	= 3,
>   };
>   
> +enum gdma_service_type {
> +	GDMA_SERVICE_TYPE_NONE		= 0,
> +	GDMA_SERVICE_TYPE_RDMA_SUSPEND	= 1,
> +	GDMA_SERVICE_TYPE_RDMA_RESUME	= 2,
> +};
> +
> +struct mana_service_work {
> +	struct work_struct work;
> +	struct gdma_dev *gdma_dev;
> +	enum gdma_service_type event;
> +};
> +
>   struct gdma_resource {
>   	/* Protect the bitmap */
>   	spinlock_t lock;
> @@ -224,6 +237,8 @@ struct gdma_dev {
>   	void *driver_data;
>   
>   	struct auxiliary_device *adev;
> +	bool is_suspended;
> +	bool rdma_teardown;
>   };
>   
>   /* MANA_PAGE_SIZE is the DMA unit */
> @@ -409,6 +424,8 @@ struct gdma_context {
>   	struct gdma_dev		mana_ib;
>   
>   	u64 pf_cap_flags1;
> +
> +	struct workqueue_struct *service_wq;
>   };

Thanks,
Alok


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ