[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1744655329-13601-3-git-send-email-kotaranov@linux.microsoft.com>
Date: Mon, 14 Apr 2025 11:28:47 -0700
From: Konstantin Taranov <kotaranov@...ux.microsoft.com>
To: kotaranov@...rosoft.com,
pabeni@...hat.com,
haiyangz@...rosoft.com,
kys@...rosoft.com,
edumazet@...gle.com,
kuba@...nel.org,
davem@...emloft.net,
decui@...rosoft.com,
wei.liu@...nel.org,
longli@...rosoft.com,
jgg@...pe.ca,
leon@...nel.org
Cc: linux-rdma@...r.kernel.org,
linux-kernel@...r.kernel.org,
netdev@...r.kernel.org
Subject: [PATCH rdma-next 2/4] RDMA/mana_ib: Add support of mana_ib for RNIC and ETH nic
From: Konstantin Taranov <kotaranov@...rosoft.com>
Allow mana_ib to be created over ethernet gdma device and
over rnic gdma device. The HW has two devices with different
capabilities and different use-cases. Initialize required
resources depending on the used gdma device.
Signed-off-by: Konstantin Taranov <kotaranov@...rosoft.com>
---
drivers/infiniband/hw/mana/device.c | 174 +++++++++++++--------------
drivers/infiniband/hw/mana/main.c | 49 +++++++-
drivers/infiniband/hw/mana/mana_ib.h | 6 +
3 files changed, 134 insertions(+), 95 deletions(-)
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index b310893..c484bed 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -101,103 +101,95 @@ static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+ struct gdma_context *gc = madev->mdev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
struct gdma_dev *mdev = madev->mdev;
struct net_device *ndev;
- struct mana_context *mc;
struct mana_ib_dev *dev;
u8 mac_addr[ETH_ALEN];
int ret;
- mc = mdev->driver_data;
-
dev = ib_alloc_device(mana_ib_dev, ib_dev);
if (!dev)
return -ENOMEM;
ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
-
- dev->ib_dev.phys_port_cnt = mc->num_ports;
-
- ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
- mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
-
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
-
- /*
- * num_comp_vectors needs to set to the max MSIX index
- * when interrupts and event queues are implemented
- */
- dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
- dev->ib_dev.dev.parent = mdev->gdma_context->dev;
-
- ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
- if (!ndev) {
- ret = -ENODEV;
- ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
- goto free_ib_device;
- }
- ether_addr_copy(mac_addr, ndev->dev_addr);
- addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
- ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
- /* mana_get_primary_netdev() returns ndev with refcount held */
- netdev_put(ndev, &dev->dev_tracker);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
- goto free_ib_device;
- }
-
- ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
- ret);
- goto free_ib_device;
- }
- dev->gdma_dev = &mdev->gdma_context->mana_ib;
-
- dev->nb.notifier_call = mana_ib_netdev_event;
- ret = register_netdevice_notifier(&dev->nb);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
- ret);
- goto deregister_device;
- }
-
- ret = mana_ib_gd_query_adapter_caps(dev);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
- ret);
- goto deregister_net_notifier;
- }
-
- ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
-
- ret = mana_ib_create_eqs(dev);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
- goto deregister_net_notifier;
- }
-
- ret = mana_ib_gd_create_rnic_adapter(dev);
- if (ret)
- goto destroy_eqs;
-
+ dev->ib_dev.num_comp_vectors = gc->max_num_queues;
+ dev->ib_dev.dev.parent = gc->dev;
+ dev->gdma_dev = mdev;
xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
- ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
- ret);
- goto destroy_rnic;
+
+ if (mana_ib_is_rnic(dev)) {
+ dev->ib_dev.phys_port_cnt = 1;
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ if (!ndev) {
+ ret = -ENODEV;
+ ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
+ goto free_ib_device;
+ }
+ ether_addr_copy(mac_addr, ndev->dev_addr);
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
+ ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+ ret);
+ goto free_ib_device;
+ }
+
+ ret = mana_ib_gd_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
+ ret = mana_ib_create_eqs(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ret = mana_ib_gd_create_rnic_adapter(dev);
+ if (ret)
+ goto destroy_eqs;
+
+ ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
+ goto destroy_rnic;
+ }
+ } else {
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+ ret = mana_eth_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
+ goto free_ib_device;
+ }
}
- dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
- MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
+ dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
+ MANA_AV_BUFFER_SIZE, 0);
if (!dev->av_pool) {
ret = -ENOMEM;
goto destroy_rnic;
}
- ret = ib_register_device(&dev->ib_dev, "mana_%d",
- mdev->gdma_context->dev);
+ ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+ mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+ ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
+ gc->dev);
if (ret)
goto deallocate_pool;
@@ -208,15 +200,16 @@ static int mana_ib_probe(struct auxiliary_device *adev,
deallocate_pool:
dma_pool_destroy(dev->av_pool);
destroy_rnic:
- xa_destroy(&dev->qp_table_wq);
- mana_ib_gd_destroy_rnic_adapter(dev);
+ if (mana_ib_is_rnic(dev))
+ mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
- mana_ib_destroy_eqs(dev);
+ if (mana_ib_is_rnic(dev))
+ mana_ib_destroy_eqs(dev);
deregister_net_notifier:
- unregister_netdevice_notifier(&dev->nb);
-deregister_device:
- mana_gd_deregister_device(dev->gdma_dev);
+ if (mana_ib_is_rnic(dev))
+ unregister_netdevice_notifier(&dev->nb);
free_ib_device:
+ xa_destroy(&dev->qp_table_wq);
ib_dealloc_device(&dev->ib_dev);
return ret;
}
@@ -227,25 +220,24 @@ static void mana_ib_remove(struct auxiliary_device *adev)
ib_unregister_device(&dev->ib_dev);
dma_pool_destroy(dev->av_pool);
+ if (mana_ib_is_rnic(dev)) {
+ mana_ib_gd_destroy_rnic_adapter(dev);
+ mana_ib_destroy_eqs(dev);
+ unregister_netdevice_notifier(&dev->nb);
+ }
xa_destroy(&dev->qp_table_wq);
- mana_ib_gd_destroy_rnic_adapter(dev);
- mana_ib_destroy_eqs(dev);
- unregister_netdevice_notifier(&dev->nb);
- mana_gd_deregister_device(dev->gdma_dev);
ib_dealloc_device(&dev->ib_dev);
}
static const struct auxiliary_device_id mana_id_table[] = {
- {
- .name = "mana.rdma",
- },
+ { .name = "mana.rdma", },
+ { .name = "mana.dpdk", },
{},
};
MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
static struct auxiliary_driver mana_driver = {
- .name = "rdma",
.probe = mana_ib_probe,
.remove = mana_ib_remove,
.id_table = mana_id_table,
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index a28b712..64526b8 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -551,6 +551,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
struct ib_port_attr attr;
int err;
@@ -560,10 +561,12 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
- if (port_num == 1) {
- immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ if (mana_ib_is_rnic(dev)) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
}
return 0;
@@ -597,12 +600,17 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
props->max_pkeys = 1;
props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
+ if (!mana_ib_is_rnic(dev)) {
+ props->raw_packet_caps = IB_RAW_PACKET_CAP_CVLAN_STRIPPING | IB_RAW_PACKET_CAP_IP_CSUM;
+ }
+
return 0;
}
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
if (!ndev)
@@ -623,7 +631,7 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_EDR;
props->pkey_tbl_len = 1;
- if (port == 1) {
+ if (mana_ib_is_rnic(dev)) {
props->gid_tbl_len = 16;
props->port_cap_flags = IB_PORT_CM_SUP;
props->ip_gids = true;
@@ -703,6 +711,36 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
return 0;
}
+int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct gdma_query_max_resources_resp resp = {};
+ struct gdma_general_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+ sizeof(req), sizeof(resp));
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
+ caps->max_cq_count = resp.max_cq;
+ caps->max_mr_count = resp.max_mst;
+ caps->max_pd_count = 0x6000;
+ caps->max_qp_wr = min_t(u32,
+ 0x100000 / GDMA_MAX_SQE_SIZE,
+ 0x100000 / GDMA_MAX_RQE_SIZE);
+ caps->max_send_sge_count = 30;
+ caps->max_recv_sge_count = 15;
+
+ return 0;
+}
+
static void
mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
{
@@ -921,6 +959,9 @@ int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 do
struct mana_rnic_create_cq_req req = {};
int err;
+ if (!mdev->eqs)
+ return -EINVAL;
+
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
req.hdr.dev_id = gc->mana_ib.dev_id;
req.adapter = mdev->adapter_handle;
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index f0dbd90..42bebd6 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -544,6 +544,11 @@ static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
complete(&qp->free);
}
+static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
+}
+
static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
{
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
@@ -643,6 +648,7 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
int mana_ib_create_eqs(struct mana_ib_dev *mdev);
--
2.43.0
Powered by blists - more mailing lists