[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<PH7PR21MB32634CB06AFF8BFFDBC003B3CE53A@PH7PR21MB3263.namprd21.prod.outlook.com>
Date: Wed, 7 Jun 2023 21:03:07 +0000
From: Long Li <longli@...rosoft.com>
To: Wei Hu <weh@...rosoft.com>, "netdev@...r.kernel.org"
<netdev@...r.kernel.org>, "linux-hyperv@...r.kernel.org"
<linux-hyperv@...r.kernel.org>, "linux-rdma@...r.kernel.org"
<linux-rdma@...r.kernel.org>, Ajay Sharma <sharmaajay@...rosoft.com>,
"jgg@...pe.ca" <jgg@...pe.ca>, "leon@...nel.org" <leon@...nel.org>, KY
Srinivasan <kys@...rosoft.com>, Haiyang Zhang <haiyangz@...rosoft.com>,
"wei.liu@...nel.org" <wei.liu@...nel.org>, Dexuan Cui <decui@...rosoft.com>,
"davem@...emloft.net" <davem@...emloft.net>, "edumazet@...gle.com"
<edumazet@...gle.com>, "kuba@...nel.org" <kuba@...nel.org>,
"pabeni@...hat.com" <pabeni@...hat.com>, "vkuznets@...hat.com"
<vkuznets@...hat.com>, "ssengar@...ux.microsoft.com"
<ssengar@...ux.microsoft.com>, "shradhagupta@...ux.microsoft.com"
<shradhagupta@...ux.microsoft.com>
Subject: RE: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib
driver.
> Subject: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib
> driver.
>
> Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext to receive
> interrupt. Attach EQ when CQ is created. Call CQ interrupt handler when
> completion interrupt happens. EQs are destroyed when ucontext is deallocated.
>
> The change calls some public APIs in mana ethernet driver to allocate EQs and
> other resources. Ehe EQ process routine is also shared by mana ethernet and
> mana ib drivers.
>
> Co-developed-by: Ajay Sharma <sharmaajay@...rosoft.com>
> Signed-off-by: Ajay Sharma <sharmaajay@...rosoft.com>
> Signed-off-by: Wei Hu <weh@...rosoft.com>
> ---
>
> v2: Use ibdev_dbg to print error messages and return -ENOMEN
> when kzalloc fails.
>
> drivers/infiniband/hw/mana/cq.c | 32 ++++-
> drivers/infiniband/hw/mana/main.c | 87 ++++++++++++
> drivers/infiniband/hw/mana/mana_ib.h | 4 +
> drivers/infiniband/hw/mana/qp.c | 90 +++++++++++-
> .../net/ethernet/microsoft/mana/gdma_main.c | 131 ++++++++++--------
> drivers/net/ethernet/microsoft/mana/mana_en.c | 1 +
> include/net/mana/gdma.h | 9 +-
> 7 files changed, 290 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
> index d141cab8a1e6..3cd680e0e753 100644
> --- a/drivers/infiniband/hw/mana/cq.c
> +++ b/drivers/infiniband/hw/mana/cq.c
> @@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> ib_cq_init_attr *attr,
> struct ib_device *ibdev = ibcq->device;
> struct mana_ib_create_cq ucmd = {};
> struct mana_ib_dev *mdev;
> + struct gdma_context *gc;
> + struct gdma_dev *gd;
> int err;
>
> mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + gd = mdev->gdma_dev;
> + gc = gd->gdma_context;
>
> if (udata->inlen < sizeof(ucmd))
> return -EINVAL;
>
> + cq->comp_vector = attr->comp_vector > gc->max_num_queues ?
> + 0 : attr->comp_vector;
> +
> err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> if (err) {
> ibdev_dbg(ibdev,
> @@ -69,11 +76,32 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct
> ib_udata *udata)
> struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> struct ib_device *ibdev = ibcq->device;
> struct mana_ib_dev *mdev;
> + struct gdma_context *gc;
> + struct gdma_dev *gd;
> +
>
> mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + gd = mdev->gdma_dev;
> + gc = gd->gdma_context;
>
> - mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> - ib_umem_release(cq->umem);
> +
> +
> + if (atomic_read(&ibcq->usecnt) == 0) {
> + mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
Need to check if this function fails. The following code will call kfree(gc->cq_table[cq->id]), it's possible that IRQ is happening at the same time if CQ is not destroyed.
> + ibdev_dbg(ibdev, "freeing gdma cq %p\n", gc->cq_table[cq->id]);
> + kfree(gc->cq_table[cq->id]);
> + gc->cq_table[cq->id] = NULL;
> + ib_umem_release(cq->umem);
> + }
>
> return 0;
> }
> +
> +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) {
> + struct mana_ib_cq *cq = ctx;
> + struct ib_device *ibdev = cq->ibcq.device;
> +
> + ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);
This debug message seems overkill?
> + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); }
> diff --git a/drivers/infiniband/hw/mana/main.c
> b/drivers/infiniband/hw/mana/main.c
> index 7be4c3adb4e2..e4efbcaed10e 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -143,6 +143,81 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> return err;
> }
>
> +static void mana_ib_destroy_eq(struct mana_ib_ucontext *ucontext,
> + struct mana_ib_dev *mdev)
> +{
> + struct gdma_context *gc = mdev->gdma_dev->gdma_context;
> + struct ib_device *ibdev = ucontext->ibucontext.device;
> + struct gdma_queue *eq;
> + int i;
> +
> + if (!ucontext->eqs)
> + return;
> +
> + for (i = 0; i < gc->max_num_queues; i++) {
> + eq = ucontext->eqs[i].eq;
> + if (!eq)
> + continue;
> +
> + mana_gd_destroy_queue(gc, eq);
> + }
> +
> + kfree(ucontext->eqs);
> + ucontext->eqs = NULL;
> +
> + ibdev_dbg(ibdev, "destroyed eq's count %d\n", gc->max_num_queues); }
Will gc->max_num_queues change after destroying a EQ?
> +
> +static int mana_ib_create_eq(struct mana_ib_ucontext *ucontext,
> + struct mana_ib_dev *mdev)
> +{
> + struct gdma_queue_spec spec = {};
> + struct gdma_queue *queue;
> + struct gdma_context *gc;
> + struct ib_device *ibdev;
> + struct gdma_dev *gd;
> + int err;
> + int i;
> +
> + if (!ucontext || !mdev)
> + return -EINVAL;
> +
> + ibdev = ucontext->ibucontext.device;
> + gd = mdev->gdma_dev;
> +
> + gc = gd->gdma_context;
> +
> + ucontext->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
> + GFP_KERNEL);
> + if (!ucontext->eqs)
> + return -ENOMEM;
> +
> + spec.type = GDMA_EQ;
> + spec.monitor_avl_buf = false;
> + spec.queue_size = EQ_SIZE;
> + spec.eq.callback = NULL;
> + spec.eq.context = ucontext->eqs;
> + spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
> + spec.eq.msix_allocated = true;
> +
> + for (i = 0; i < gc->max_num_queues; i++) {
> + spec.eq.msix_index = i;
> + err = mana_gd_create_mana_eq(gd, &spec, &queue);
> + if (err)
> + goto out;
> +
> + queue->eq.disable_needed = true;
> + ucontext->eqs[i].eq = queue;
> + }
> +
> + return 0;
> +
> +out:
> + ibdev_dbg(ibdev, "Failed to allocated eq err %d\n", err);
> + mana_ib_destroy_eq(ucontext, mdev);
> + return err;
> +}
> +
> static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
> int doorbell_page)
> {
> @@ -225,7 +300,17 @@ int mana_ib_alloc_ucontext(struct ib_ucontext
> *ibcontext,
>
> ucontext->doorbell = doorbell_page;
>
> + ret = mana_ib_create_eq(ucontext, mdev);
> + if (ret) {
> + ibdev_dbg(ibdev, "Failed to create eq's , ret %d\n", ret);
> + goto err;
> + }
> +
> return 0;
> +
> +err:
> + mana_gd_destroy_doorbell_page(gc, doorbell_page);
> + return ret;
> }
>
> void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) @@ -240,6
> +325,8 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
> mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> gc = mdev->gdma_dev->gdma_context;
>
> + mana_ib_destroy_eq(mana_ucontext, mdev);
> +
> ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
> if (ret)
> ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
> diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index 502cc8672eef..9672fa1670a5 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -67,6 +67,7 @@ struct mana_ib_cq {
> int cqe;
> u64 gdma_region;
> u64 id;
> + u32 comp_vector;
> };
>
> struct mana_ib_qp {
> @@ -86,6 +87,7 @@ struct mana_ib_qp {
> struct mana_ib_ucontext {
> struct ib_ucontext ibucontext;
> u32 doorbell;
> + struct mana_eq *eqs;
> };
>
> struct mana_ib_rwq_ind_table {
> @@ -159,4 +161,6 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port,
> int index,
>
> void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
>
> +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq);
> +
> #endif
> diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
> index 54b61930a7fd..e133d86c0875 100644
> --- a/drivers/infiniband/hw/mana/qp.c
> +++ b/drivers/infiniband/hw/mana/qp.c
> @@ -96,16 +96,20 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
> struct mana_ib_dev *mdev =
> container_of(pd->device, struct mana_ib_dev, ib_dev);
> + struct ib_ucontext *ib_ucontext = pd->uobject->context;
> struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
> struct mana_ib_create_qp_rss_resp resp = {};
> struct mana_ib_create_qp_rss ucmd = {};
> + struct mana_ib_ucontext *mana_ucontext;
> struct gdma_dev *gd = mdev->gdma_dev;
> mana_handle_t *mana_ind_table;
> struct mana_port_context *mpc;
> + struct gdma_queue *gdma_cq;
> struct mana_context *mc;
> struct net_device *ndev;
> struct mana_ib_cq *cq;
> struct mana_ib_wq *wq;
> + struct mana_eq *eq;
> unsigned int ind_tbl_size;
> struct ib_cq *ibcq;
> struct ib_wq *ibwq;
> @@ -114,6 +118,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> int ret;
>
> mc = gd->driver_data;
> + mana_ucontext =
> + container_of(ib_ucontext, struct mana_ib_ucontext, ibucontext);
>
> if (!udata || udata->inlen < sizeof(ucmd))
> return -EINVAL;
> @@ -180,6 +186,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> for (i = 0; i < ind_tbl_size; i++) {
> struct mana_obj_spec wq_spec = {};
> struct mana_obj_spec cq_spec = {};
> + unsigned int max_num_queues = gd->gdma_context-
> >max_num_queues;
>
> ibwq = ind_tbl->ind_tbl[i];
> wq = container_of(ibwq, struct mana_ib_wq, ibwq); @@ -193,7
> +200,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd
> *pd,
> cq_spec.gdma_region = cq->gdma_region;
> cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
> cq_spec.modr_ctx_id = 0;
> - cq_spec.attached_eq = GDMA_CQ_NO_EQ;
> + eq = &mana_ucontext->eqs[cq->comp_vector %
> max_num_queues];
> + cq_spec.attached_eq = eq->eq->id;
>
> ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
> &wq_spec, &cq_spec, &wq-
> >rx_object); @@ -207,6 +215,9 @@ static int mana_ib_create_qp_rss(struct
> ib_qp *ibqp, struct ib_pd *pd,
> wq->id = wq_spec.queue_index;
> cq->id = cq_spec.queue_index;
>
> + ibdev_dbg(&mdev->ib_dev, "attached eq id %u cq with
> id %llu\n",
> + eq->eq->id, cq->id);
> +
> ibdev_dbg(&mdev->ib_dev,
> "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
> ret, wq->rx_object, wq->id, cq->id); @@ -215,6
> +226,27 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd
> *pd,
> resp.entries[i].wqid = wq->id;
>
> mana_ind_table[i] = wq->rx_object;
> +
> + if (gd->gdma_context->cq_table[cq->id] == NULL) {
> +
> + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
> + if (!gdma_cq) {
> + ibdev_dbg(&mdev->ib_dev,
> + "failed to allocate gdma_cq\n");
> + ret = -ENOMEM;
> + goto free_cq;
> + }
> +
> + ibdev_dbg(&mdev->ib_dev, "gdma cq allocated %p\n",
> + gdma_cq);
> +
> + gdma_cq->cq.context = cq;
> + gdma_cq->type = GDMA_CQ;
> + gdma_cq->cq.callback = mana_ib_cq_handler;
> + gdma_cq->id = cq->id;
> + gd->gdma_context->cq_table[cq->id] = gdma_cq;
> + }
> +
> }
> resp.num_entries = i;
>
> @@ -224,7 +256,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> ucmd.rx_hash_key_len,
> ucmd.rx_hash_key);
> if (ret)
> - goto fail;
> + goto free_cq;
>
> ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
> if (ret) {
> @@ -238,6 +270,23 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
>
> return 0;
>
> +free_cq:
> + {
> + int j = i;
> + u64 cqid;
> +
> + while (j-- > 0) {
> + cqid = resp.entries[j].cqid;
> + gdma_cq = gd->gdma_context->cq_table[cqid];
> + cq = gdma_cq->cq.context;
> + if (atomic_read(&cq->ibcq.usecnt) == 0) {
> + kfree(gd->gdma_context->cq_table[cqid]);
> + gd->gdma_context->cq_table[cqid] = NULL;
> + }
> + }
> +
> + }
> +
> fail:
> while (i-- > 0) {
> ibwq = ind_tbl->ind_tbl[i];
> @@ -269,10 +318,12 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> struct mana_obj_spec wq_spec = {};
> struct mana_obj_spec cq_spec = {};
> struct mana_port_context *mpc;
> + struct gdma_queue *gdma_cq;
> struct mana_context *mc;
> struct net_device *ndev;
> struct ib_umem *umem;
> - int err;
> + struct mana_eq *eq;
> + int err, eq_vec;
> u32 port;
>
> mc = gd->driver_data;
> @@ -350,7 +401,9 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> cq_spec.gdma_region = send_cq->gdma_region;
> cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
> cq_spec.modr_ctx_id = 0;
> - cq_spec.attached_eq = GDMA_CQ_NO_EQ;
> + eq_vec = send_cq->comp_vector % gd->gdma_context-
> >max_num_queues;
> + eq = &mana_ucontext->eqs[eq_vec];
> + cq_spec.attached_eq = eq->eq->id;
>
> err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ,
> &wq_spec,
> &cq_spec, &qp->tx_object);
> @@ -368,6 +421,26 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> qp->sq_id = wq_spec.queue_index;
> send_cq->id = cq_spec.queue_index;
>
> + if (gd->gdma_context->cq_table[send_cq->id] == NULL) {
> +
> + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
> + if (!gdma_cq) {
> + ibdev_dbg(&mdev->ib_dev,
> + "failed to allocate gdma_cq\n");
> + err = -ENOMEM;
> + goto err_destroy_wqobj_and_cq;
> + }
> +
> + pr_debug("gdma cq allocated %p\n", gdma_cq);
Should use ibdev_dbg
Thanks,
Long
Powered by blists - more mailing lists