lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
 <SA6PR21MB423113D15A5BF3D0CB834EB4CE94A@SA6PR21MB4231.namprd21.prod.outlook.com>
Date: Sun, 11 May 2025 18:19:03 +0000
From: Long Li <longli@...rosoft.com>
To: Konstantin Taranov <kotaranov@...ux.microsoft.com>, Konstantin Taranov
	<kotaranov@...rosoft.com>, "pabeni@...hat.com" <pabeni@...hat.com>, Haiyang
 Zhang <haiyangz@...rosoft.com>, KY Srinivasan <kys@...rosoft.com>,
	"edumazet@...gle.com" <edumazet@...gle.com>, "kuba@...nel.org"
	<kuba@...nel.org>, "davem@...emloft.net" <davem@...emloft.net>, Dexuan Cui
	<decui@...rosoft.com>, "wei.liu@...nel.org" <wei.liu@...nel.org>,
	"jgg@...pe.ca" <jgg@...pe.ca>, "leon@...nel.org" <leon@...nel.org>
CC: "linux-rdma@...r.kernel.org" <linux-rdma@...r.kernel.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: RE: [PATCH rdma-next v4 2/4] RDMA/mana_ib: Add support of mana_ib for
 RNIC and ETH nic



> -----Original Message-----
> From: Konstantin Taranov <kotaranov@...ux.microsoft.com>
> Sent: Wednesday, May 7, 2025 8:59 AM
> To: Konstantin Taranov <kotaranov@...rosoft.com>; pabeni@...hat.com;
> Haiyang Zhang <haiyangz@...rosoft.com>; KY Srinivasan <kys@...rosoft.com>;
> edumazet@...gle.com; kuba@...nel.org; davem@...emloft.net; Dexuan Cui
> <decui@...rosoft.com>; wei.liu@...nel.org; Long Li <longli@...rosoft.com>;
> jgg@...pe.ca; leon@...nel.org
> Cc: linux-rdma@...r.kernel.org; linux-kernel@...r.kernel.org;
> netdev@...r.kernel.org
> Subject: [PATCH rdma-next v4 2/4] RDMA/mana_ib: Add support of mana_ib for
> RNIC and ETH nic
> 
> From: Konstantin Taranov <kotaranov@...rosoft.com>
> 
> Allow mana_ib to be created over ethernet gdma device and over rnic gdma
> device. The HW has two devices with different capabilities and different use-
> cases. Initialize required resources depending on the used gdma device.
> 
> Signed-off-by: Konstantin Taranov <kotaranov@...rosoft.com>

Reviewed-by: Long Li <longli@...rosoft.com>

> ---
>  drivers/infiniband/hw/mana/device.c  | 174 +++++++++++++--------------
>  drivers/infiniband/hw/mana/main.c    |  55 ++++++++-
>  drivers/infiniband/hw/mana/mana_ib.h |   6 +
>  3 files changed, 138 insertions(+), 97 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mana/device.c
> b/drivers/infiniband/hw/mana/device.c
> index b310893..165c0a1 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -101,103 +101,95 @@ static int mana_ib_probe(struct auxiliary_device
> *adev,
>  			 const struct auxiliary_device_id *id)  {
>  	struct mana_adev *madev = container_of(adev, struct mana_adev,
> adev);
> +	struct gdma_context *gc = madev->mdev->gdma_context;
> +	struct mana_context *mc = gc->mana.driver_data;
>  	struct gdma_dev *mdev = madev->mdev;
>  	struct net_device *ndev;
> -	struct mana_context *mc;
>  	struct mana_ib_dev *dev;
>  	u8 mac_addr[ETH_ALEN];
>  	int ret;
> 
> -	mc = mdev->driver_data;
> -
>  	dev = ib_alloc_device(mana_ib_dev, ib_dev);
>  	if (!dev)
>  		return -ENOMEM;
> 
>  	ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
> -
> -	dev->ib_dev.phys_port_cnt = mc->num_ports;
> -
> -	ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
> -		  mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
> -
>  	dev->ib_dev.node_type = RDMA_NODE_IB_CA;
> -
> -	/*
> -	 * num_comp_vectors needs to set to the max MSIX index
> -	 * when interrupts and event queues are implemented
> -	 */
> -	dev->ib_dev.num_comp_vectors = mdev->gdma_context-
> >max_num_queues;
> -	dev->ib_dev.dev.parent = mdev->gdma_context->dev;
> -
> -	ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
> -	if (!ndev) {
> -		ret = -ENODEV;
> -		ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
> -		goto free_ib_device;
> -	}
> -	ether_addr_copy(mac_addr, ndev->dev_addr);
> -	addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
> -	ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
> -	/* mana_get_primary_netdev() returns ndev with refcount held */
> -	netdev_put(ndev, &dev->dev_tracker);
> -	if (ret) {
> -		ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
> -		goto free_ib_device;
> -	}
> -
> -	ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
> -	if (ret) {
> -		ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
> -			  ret);
> -		goto free_ib_device;
> -	}
> -	dev->gdma_dev = &mdev->gdma_context->mana_ib;
> -
> -	dev->nb.notifier_call = mana_ib_netdev_event;
> -	ret = register_netdevice_notifier(&dev->nb);
> -	if (ret) {
> -		ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
> -			  ret);
> -		goto deregister_device;
> -	}
> -
> -	ret = mana_ib_gd_query_adapter_caps(dev);
> -	if (ret) {
> -		ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
> -			  ret);
> -		goto deregister_net_notifier;
> -	}
> -
> -	ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
> -
> -	ret = mana_ib_create_eqs(dev);
> -	if (ret) {
> -		ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
> -		goto deregister_net_notifier;
> -	}
> -
> -	ret = mana_ib_gd_create_rnic_adapter(dev);
> -	if (ret)
> -		goto destroy_eqs;
> -
> +	dev->ib_dev.num_comp_vectors = gc->max_num_queues;
> +	dev->ib_dev.dev.parent = gc->dev;
> +	dev->gdma_dev = mdev;
>  	xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
> -	ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
> -	if (ret) {
> -		ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
> -			  ret);
> -		goto destroy_rnic;
> +
> +	if (mana_ib_is_rnic(dev)) {
> +		dev->ib_dev.phys_port_cnt = 1;
> +		ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
> +		if (!ndev) {
> +			ret = -ENODEV;
> +			ibdev_err(&dev->ib_dev, "Failed to get netdev for IB
> port 1");
> +			goto free_ib_device;
> +		}
> +		ether_addr_copy(mac_addr, ndev->dev_addr);
> +		addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev-
> >dev_addr);
> +		ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
> +		/* mana_get_primary_netdev() returns ndev with refcount held
> */
> +		netdev_put(ndev, &dev->dev_tracker);
> +		if (ret) {
> +			ibdev_err(&dev->ib_dev, "Failed to set ib netdev,
> ret %d", ret);
> +			goto free_ib_device;
> +		}
> +
> +		dev->nb.notifier_call = mana_ib_netdev_event;
> +		ret = register_netdevice_notifier(&dev->nb);
> +		if (ret) {
> +			ibdev_err(&dev->ib_dev, "Failed to register net
> notifier, %d",
> +				  ret);
> +			goto free_ib_device;
> +		}
> +
> +		ret = mana_ib_gd_query_adapter_caps(dev);
> +		if (ret) {
> +			ibdev_err(&dev->ib_dev, "Failed to query device caps,
> ret %d", ret);
> +			goto deregister_net_notifier;
> +		}
> +
> +		ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
> +
> +		ret = mana_ib_create_eqs(dev);
> +		if (ret) {
> +			ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d",
> ret);
> +			goto deregister_net_notifier;
> +		}
> +
> +		ret = mana_ib_gd_create_rnic_adapter(dev);
> +		if (ret)
> +			goto destroy_eqs;
> +
> +		ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
> +		if (ret) {
> +			ibdev_err(&dev->ib_dev, "Failed to add Mac address,
> ret %d", ret);
> +			goto destroy_rnic;
> +		}
> +	} else {
> +		dev->ib_dev.phys_port_cnt = mc->num_ports;
> +		ret = mana_eth_query_adapter_caps(dev);
> +		if (ret) {
> +			ibdev_err(&dev->ib_dev, "Failed to query ETH device
> caps, ret %d", ret);
> +			goto free_ib_device;
> +		}
>  	}
> 
> -	dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context-
> >dev,
> -				       MANA_AV_BUFFER_SIZE,
> MANA_AV_BUFFER_SIZE, 0);
> +	dev->av_pool = dma_pool_create("mana_ib_av", gc->dev,
> MANA_AV_BUFFER_SIZE,
> +				       MANA_AV_BUFFER_SIZE, 0);
>  	if (!dev->av_pool) {
>  		ret = -ENOMEM;
>  		goto destroy_rnic;
>  	}
> 
> -	ret = ib_register_device(&dev->ib_dev, "mana_%d",
> -				 mdev->gdma_context->dev);
> +	ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
> +		  mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
> +
> +	ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ?
> "mana_%d" : "manae_%d",
> +				 gc->dev);
>  	if (ret)
>  		goto deallocate_pool;
> 
> @@ -208,15 +200,16 @@ static int mana_ib_probe(struct auxiliary_device
> *adev,
>  deallocate_pool:
>  	dma_pool_destroy(dev->av_pool);
>  destroy_rnic:
> -	xa_destroy(&dev->qp_table_wq);
> -	mana_ib_gd_destroy_rnic_adapter(dev);
> +	if (mana_ib_is_rnic(dev))
> +		mana_ib_gd_destroy_rnic_adapter(dev);
>  destroy_eqs:
> -	mana_ib_destroy_eqs(dev);
> +	if (mana_ib_is_rnic(dev))
> +		mana_ib_destroy_eqs(dev);
>  deregister_net_notifier:
> -	unregister_netdevice_notifier(&dev->nb);
> -deregister_device:
> -	mana_gd_deregister_device(dev->gdma_dev);
> +	if (mana_ib_is_rnic(dev))
> +		unregister_netdevice_notifier(&dev->nb);
>  free_ib_device:
> +	xa_destroy(&dev->qp_table_wq);
>  	ib_dealloc_device(&dev->ib_dev);
>  	return ret;
>  }
> @@ -227,25 +220,24 @@ static void mana_ib_remove(struct auxiliary_device
> *adev)
> 
>  	ib_unregister_device(&dev->ib_dev);
>  	dma_pool_destroy(dev->av_pool);
> +	if (mana_ib_is_rnic(dev)) {
> +		mana_ib_gd_destroy_rnic_adapter(dev);
> +		mana_ib_destroy_eqs(dev);
> +		unregister_netdevice_notifier(&dev->nb);
> +	}
>  	xa_destroy(&dev->qp_table_wq);
> -	mana_ib_gd_destroy_rnic_adapter(dev);
> -	mana_ib_destroy_eqs(dev);
> -	unregister_netdevice_notifier(&dev->nb);
> -	mana_gd_deregister_device(dev->gdma_dev);
>  	ib_dealloc_device(&dev->ib_dev);
>  }
> 
>  static const struct auxiliary_device_id mana_id_table[] = {
> -	{
> -		.name = "mana.rdma",
> -	},
> +	{ .name = "mana.rdma", },
> +	{ .name = "mana.eth", },
>  	{},
>  };
> 
>  MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
> 
>  static struct auxiliary_driver mana_driver = {
> -	.name = "rdma",
>  	.probe = mana_ib_probe,
>  	.remove = mana_ib_remove,
>  	.id_table = mana_id_table,
> diff --git a/drivers/infiniband/hw/mana/main.c
> b/drivers/infiniband/hw/mana/main.c
> index bb0f685..3837e30 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -4,6 +4,7 @@
>   */
> 
>  #include "mana_ib.h"
> +#include "linux/pci.h"
> 
>  void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
>  			 u32 port)
> @@ -551,6 +552,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct
> vm_area_struct *vma)  int mana_ib_get_port_immutable(struct ib_device
> *ibdev, u32 port_num,
>  			       struct ib_port_immutable *immutable)  {
> +	struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev,
> +ib_dev);
>  	struct ib_port_attr attr;
>  	int err;
> 
> @@ -560,10 +562,12 @@ int mana_ib_get_port_immutable(struct ib_device
> *ibdev, u32 port_num,
> 
>  	immutable->pkey_tbl_len = attr.pkey_tbl_len;
>  	immutable->gid_tbl_len = attr.gid_tbl_len;
> -	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
> -	if (port_num == 1) {
> -		immutable->core_cap_flags |=
> RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
> +
> +	if (mana_ib_is_rnic(dev)) {
> +		immutable->core_cap_flags =
> RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
>  		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
> +	} else {
> +		immutable->core_cap_flags =
> RDMA_CORE_PORT_RAW_PACKET;
>  	}
> 
>  	return 0;
> @@ -572,10 +576,12 @@ int mana_ib_get_port_immutable(struct ib_device
> *ibdev, u32 port_num,  int mana_ib_query_device(struct ib_device *ibdev, struct
> ib_device_attr *props,
>  			 struct ib_udata *uhw)
>  {
> -	struct mana_ib_dev *dev = container_of(ibdev,
> -			struct mana_ib_dev, ib_dev);
> +	struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev,
> ib_dev);
> +	struct pci_dev *pdev = to_pci_dev(mdev_to_gc(dev)->dev);
> 
>  	memset(props, 0, sizeof(*props));
> +	props->vendor_id = pdev->vendor;
> +	props->vendor_part_id = dev->gdma_dev->dev_id.type;
>  	props->max_mr_size = MANA_IB_MAX_MR_SIZE;
>  	props->page_size_cap = dev->adapter_caps.page_size_cap;
>  	props->max_qp = dev->adapter_caps.max_qp_count; @@ -596,6 +602,8
> @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr
> *props,
>  	props->max_ah = INT_MAX;
>  	props->max_pkeys = 1;
>  	props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
> +	if (!mana_ib_is_rnic(dev))
> +		props->raw_packet_caps = IB_RAW_PACKET_CAP_IP_CSUM;
> 
>  	return 0;
>  }
> @@ -603,6 +611,7 @@ int mana_ib_query_device(struct ib_device *ibdev, struct
> ib_device_attr *props,  int mana_ib_query_port(struct ib_device *ibdev, u32 port,
>  		       struct ib_port_attr *props)
>  {
> +	struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev,
> +ib_dev);
>  	struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
> 
>  	if (!ndev)
> @@ -623,7 +632,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32
> port,
>  	props->active_width = IB_WIDTH_4X;
>  	props->active_speed = IB_SPEED_EDR;
>  	props->pkey_tbl_len = 1;
> -	if (port == 1) {
> +	if (mana_ib_is_rnic(dev)) {
>  		props->gid_tbl_len = 16;
>  		props->port_cap_flags = IB_PORT_CM_SUP;
>  		props->ip_gids = true;
> @@ -703,6 +712,37 @@ int mana_ib_gd_query_adapter_caps(struct
> mana_ib_dev *dev)
>  	return 0;
>  }
> 
> +int mana_eth_query_adapter_caps(struct mana_ib_dev *dev) {
> +	struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
> +	struct gdma_query_max_resources_resp resp = {};
> +	struct gdma_general_req req = {};
> +	int err;
> +
> +	mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
> +			     sizeof(req), sizeof(resp));
> +
> +	err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req,
> sizeof(resp), &resp);
> +	if (err) {
> +		ibdev_err(&dev->ib_dev,
> +			  "Failed to query adapter caps err %d", err);
> +		return err;
> +	}
> +
> +	caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
> +	caps->max_cq_count = resp.max_cq;
> +	caps->max_mr_count = resp.max_mst;
> +	caps->max_pd_count = 0x6000;
> +	caps->max_qp_wr = min_t(u32,
> +				0x100000 / GDMA_MAX_SQE_SIZE,
> +				0x100000 / GDMA_MAX_RQE_SIZE);
> +	caps->max_send_sge_count = 30;
> +	caps->max_recv_sge_count = 15;
> +	caps->page_size_cap = PAGE_SZ_BM;
> +
> +	return 0;
> +}
> +
>  static void
>  mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event
> *event)  { @@ -921,6 +961,9 @@ int mana_ib_gd_create_cq(struct
> mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 do
>  	struct mana_rnic_create_cq_req req = {};
>  	int err;
> 
> +	if (!mdev->eqs)
> +		return -EINVAL;
> +
>  	mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req),
> sizeof(resp));
>  	req.hdr.dev_id = gc->mana_ib.dev_id;
>  	req.adapter = mdev->adapter_handle;
> diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index f0dbd90..42bebd6 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -544,6 +544,11 @@ static inline void mana_put_qp_ref(struct mana_ib_qp
> *qp)
>  		complete(&qp->free);
>  }
> 
> +static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev) {
> +	return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB; }
> +
>  static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev,
> u32 port)  {
>  	struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
> ib_dev); @@ -643,6 +648,7 @@ int mana_ib_query_gid(struct ib_device *ibdev,
> u32 port, int index,  void mana_ib_disassociate_ucontext(struct ib_ucontext
> *ibcontext);
> 
>  int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
> +int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
> 
>  int mana_ib_create_eqs(struct mana_ib_dev *mdev);
> 
> --
> 2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ