[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID:
<SA6PR21MB423192250850A77A47E8BCBACEF72@SA6PR21MB4231.namprd21.prod.outlook.com>
Date: Wed, 5 Feb 2025 19:08:36 +0000
From: Long Li <longli@...rosoft.com>
To: Konstantin Taranov <kotaranov@...ux.microsoft.com>, Konstantin Taranov
<kotaranov@...rosoft.com>, Shiraz Saleem <shirazsaleem@...rosoft.com>,
"sharmaajay@...rosoft.com" <sharmaajay@...rosoft.com>, "jgg@...pe.ca"
<jgg@...pe.ca>, "leon@...nel.org" <leon@...nel.org>
CC: "linux-rdma@...r.kernel.org" <linux-rdma@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: RE: [PATCH rdma-next 1/1] RDMA/mana_ib: Add port statistics support
> Subject: [PATCH rdma-next 1/1] RDMA/mana_ib: Add port statistics support
>
> From: Shiraz Saleem <shirazsaleem@...rosoft.com>
>
> Implement alloc_hw_port_stats and get_hw_stats APIs to support querying
> MANA VF port level statistics from rdma stat tool.
>
> Example output from rdma stat tool:
>
> $rdma statistic show link mana_0/1 -p
> link mana_0/1
> requester_timeout 45
> requester_oos_nak 0
> requester_rnr_nak 0
> responder_rnr_nak 0
> responder_oos 0
> responder_dup_request 0
> requester_implicit_nak 0
> requester_readresp_psn_mismatch 0
> nak_inv_req 0
> nak_access_error 0
> nak_opp_error 0
> nak_inv_read 0
> responder_local_len_error 0
> requestor_local_prot_error 0
> responder_rem_access_error 0
> responder_local_qp_error 0
> responder_malformed_wqe 0
> general_hw_error 6
> requester_rnr_nak_retries_exceeded 0
> requester_retries_exceeded 5
> total_fatal_error 6
> received_cnps 0
> num_qps_congested 0
> rate_inc_events 0
> num_qps_recovered 0
> current_rate 100000
>
> Signed-off-by: Shiraz Saleem <shirazsaleem@...rosoft.com>
> Signed-off-by: Konstantin Taranov <kotaranov@...rosoft.com>
Reviewed-by: Long Li <longli@...rosoft.com>
> ---
> drivers/infiniband/hw/mana/Makefile | 2 +-
> drivers/infiniband/hw/mana/counters.c | 105 ++++++++++++++++++++++++++
> drivers/infiniband/hw/mana/counters.h | 44 +++++++++++
> drivers/infiniband/hw/mana/device.c | 7 ++
> drivers/infiniband/hw/mana/mana_ib.h | 61 ++++++++++++---
> 5 files changed, 206 insertions(+), 13 deletions(-) create mode 100644
> drivers/infiniband/hw/mana/counters.c
> create mode 100644 drivers/infiniband/hw/mana/counters.h
>
> diff --git a/drivers/infiniband/hw/mana/Makefile
> b/drivers/infiniband/hw/mana/Makefile
> index 79426e7..921c05e 100644
> --- a/drivers/infiniband/hw/mana/Makefile
> +++ b/drivers/infiniband/hw/mana/Makefile
> @@ -1,4 +1,4 @@
> # SPDX-License-Identifier: GPL-2.0-only
> obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
>
> -mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o
> +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
> diff --git a/drivers/infiniband/hw/mana/counters.c
> b/drivers/infiniband/hw/mana/counters.c
> new file mode 100644
> index 0000000..e533ce2
> --- /dev/null
> +++ b/drivers/infiniband/hw/mana/counters.c
> @@ -0,0 +1,105 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
> + */
> +
> +#include "counters.h"
> +
> +static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
> + [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
> + [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
> + [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
> + [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
> + [MANA_IB_RESPONDER_OOS].name = "responder_oos",
> + [MANA_IB_RESPONDER_DUP_REQUEST].name =
> "responder_dup_request",
> + [MANA_IB_REQUESTER_IMPLICIT_NAK].name =
> "requester_implicit_nak",
> + [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name =
> "requester_readresp_psn_mismatch",
> + [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
> + [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
> + [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
> + [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
> + [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name =
> "responder_local_len_error",
> + [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name =
> "requestor_local_prot_error",
> + [MANA_IB_RESPONDER_REM_ACCESS_ERR].name =
> "responder_rem_access_error",
> + [MANA_IB_RESPONDER_LOCAL_QP_ERR].name =
> "responder_local_qp_error",
> + [MANA_IB_RESPONDER_MALFORMED_WQE].name =
> "responder_malformed_wqe",
> + [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
> + [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name =
> "requester_rnr_nak_retries_exceeded",
> + [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name =
> "requester_retries_exceeded",
> + [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
> + [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
> + [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
> + [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
> + [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
> + [MANA_IB_CURRENT_RATE].name = "current_rate", };
> +
> +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
> + u32 port_num)
> +{
> + return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
> +
> ARRAY_SIZE(mana_ib_port_stats_desc),
> +
> RDMA_HW_STATS_DEFAULT_LIFESPAN); }
> +
> +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats
> *stats,
> + u32 port_num, int index)
> +{
> + struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
> + ib_dev);
> + struct mana_rnic_query_vf_cntrs_resp resp = {};
> + struct mana_rnic_query_vf_cntrs_req req = {};
> + int err;
> +
> + mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
> + sizeof(req), sizeof(resp));
> + req.hdr.dev_id = mdev->gdma_dev->dev_id;
> + req.adapter = mdev->adapter_handle;
> +
> + err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
> + sizeof(resp), &resp);
> + if (err) {
> + ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
> + err);
> + return err;
> + }
> +
> + stats->value[MANA_IB_REQUESTER_TIMEOUT] =
> resp.requester_timeout;
> + stats->value[MANA_IB_REQUESTER_OOS_NAK] =
> resp.requester_oos_nak;
> + stats->value[MANA_IB_REQUESTER_RNR_NAK] =
> resp.requester_rnr_nak;
> + stats->value[MANA_IB_RESPONDER_RNR_NAK] =
> resp.responder_rnr_nak;
> + stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
> + stats->value[MANA_IB_RESPONDER_DUP_REQUEST] =
> resp.responder_dup_request;
> + stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
> + resp.requester_implicit_nak;
> + stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
> +
> resp.requester_readresp_psn_mismatch;
> + stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
> + stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
> + stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
> + stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
> + stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
> + resp.responder_local_len_err;
> + stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
> + resp.requestor_local_prot_err;
> + stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
> + resp.responder_rem_access_err;
> + stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
> + resp.responder_local_qp_err;
> + stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
> + resp.responder_malformed_wqe;
> + stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
> + stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
> +
> resp.requester_rnr_nak_retries_exceeded;
> + stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
> + resp.requester_retries_exceeded;
> + stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
> +
> + stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
> + stats->value[MANA_IB_NUM_QPS_CONGESTED] =
> resp.num_qps_congested;
> + stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
> + stats->value[MANA_IB_NUM_QPS_RECOVERED] =
> resp.num_qps_recovered;
> + stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
> +
> + return ARRAY_SIZE(mana_ib_port_stats_desc);
> +}
> diff --git a/drivers/infiniband/hw/mana/counters.h
> b/drivers/infiniband/hw/mana/counters.h
> new file mode 100644
> index 0000000..7ff92d2
> --- /dev/null
> +++ b/drivers/infiniband/hw/mana/counters.h
> @@ -0,0 +1,44 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
> + */
> +
> +#ifndef _COUNTERS_H_
> +#define _COUNTERS_H_
> +
> +#include "mana_ib.h"
> +
> +enum mana_ib_port_counters {
> + MANA_IB_REQUESTER_TIMEOUT,
> + MANA_IB_REQUESTER_OOS_NAK,
> + MANA_IB_REQUESTER_RNR_NAK,
> + MANA_IB_RESPONDER_RNR_NAK,
> + MANA_IB_RESPONDER_OOS,
> + MANA_IB_RESPONDER_DUP_REQUEST,
> + MANA_IB_REQUESTER_IMPLICIT_NAK,
> + MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
> + MANA_IB_NAK_INV_REQ,
> + MANA_IB_NAK_ACCESS_ERR,
> + MANA_IB_NAK_OPP_ERR,
> + MANA_IB_NAK_INV_READ,
> + MANA_IB_RESPONDER_LOCAL_LEN_ERR,
> + MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
> + MANA_IB_RESPONDER_REM_ACCESS_ERR,
> + MANA_IB_RESPONDER_LOCAL_QP_ERR,
> + MANA_IB_RESPONDER_MALFORMED_WQE,
> + MANA_IB_GENERAL_HW_ERR,
> + MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
> + MANA_IB_REQUESTER_RETRIES_EXCEEDED,
> + MANA_IB_TOTAL_FATAL_ERR,
> + MANA_IB_RECEIVED_CNPS,
> + MANA_IB_NUM_QPS_CONGESTED,
> + MANA_IB_RATE_INC_EVENTS,
> + MANA_IB_NUM_QPS_RECOVERED,
> + MANA_IB_CURRENT_RATE,
> +};
> +
> +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
> + u32 port_num);
> +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats
> *stats,
> + u32 port_num, int index);
> +#endif /* _COUNTERS_H_ */
> diff --git a/drivers/infiniband/hw/mana/device.c
> b/drivers/infiniband/hw/mana/device.c
> index 97502bc..fd8efc9 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -59,6 +59,11 @@ static const struct ib_device_ops mana_ib_dev_ops = {
> ib_ind_table),
> };
>
> +static const struct ib_device_ops mana_ib_stats_ops = {
> + .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
> + .get_hw_stats = mana_ib_get_hw_stats,
> +};
> +
> static int mana_ib_probe(struct auxiliary_device *adev,
> const struct auxiliary_device_id *id) { @@ -124,6
> +129,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
> goto deregister_device;
> }
>
> + ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
> +
> ret = mana_ib_create_eqs(dev);
> if (ret) {
> ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); diff
> --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index cd771af..4660dab 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -15,6 +15,7 @@
>
> #include <net/mana/mana.h>
> #include "shadow_queue.h"
> +#include "counters.h"
>
> #define PAGE_SZ_BM \
> (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
> @@ -192,18 +193,19 @@ struct mana_ib_rwq_ind_table { };
>
> enum mana_ib_command_code {
> - MANA_IB_GET_ADAPTER_CAP = 0x30001,
> - MANA_IB_CREATE_ADAPTER = 0x30002,
> - MANA_IB_DESTROY_ADAPTER = 0x30003,
> - MANA_IB_CONFIG_IP_ADDR = 0x30004,
> - MANA_IB_CONFIG_MAC_ADDR = 0x30005,
> - MANA_IB_CREATE_UD_QP = 0x30006,
> - MANA_IB_DESTROY_UD_QP = 0x30007,
> - MANA_IB_CREATE_CQ = 0x30008,
> - MANA_IB_DESTROY_CQ = 0x30009,
> - MANA_IB_CREATE_RC_QP = 0x3000a,
> - MANA_IB_DESTROY_RC_QP = 0x3000b,
> - MANA_IB_SET_QP_STATE = 0x3000d,
> + MANA_IB_GET_ADAPTER_CAP = 0x30001,
> + MANA_IB_CREATE_ADAPTER = 0x30002,
> + MANA_IB_DESTROY_ADAPTER = 0x30003,
> + MANA_IB_CONFIG_IP_ADDR = 0x30004,
> + MANA_IB_CONFIG_MAC_ADDR = 0x30005,
> + MANA_IB_CREATE_UD_QP = 0x30006,
> + MANA_IB_DESTROY_UD_QP = 0x30007,
> + MANA_IB_CREATE_CQ = 0x30008,
> + MANA_IB_DESTROY_CQ = 0x30009,
> + MANA_IB_CREATE_RC_QP = 0x3000a,
> + MANA_IB_DESTROY_RC_QP = 0x3000b,
> + MANA_IB_SET_QP_STATE = 0x3000d,
> + MANA_IB_QUERY_VF_COUNTERS = 0x30022,
> };
>
> struct mana_ib_query_adapter_caps_req { @@ -466,6 +468,41 @@ struct
> mana_rdma_cqe {
> };
> }; /* HW DATA */
>
> +struct mana_rnic_query_vf_cntrs_req {
> + struct gdma_req_hdr hdr;
> + mana_handle_t adapter;
> +}; /* HW Data */
> +
> +struct mana_rnic_query_vf_cntrs_resp {
> + struct gdma_resp_hdr hdr;
> + u64 requester_timeout;
> + u64 requester_oos_nak;
> + u64 requester_rnr_nak;
> + u64 responder_rnr_nak;
> + u64 responder_oos;
> + u64 responder_dup_request;
> + u64 requester_implicit_nak;
> + u64 requester_readresp_psn_mismatch;
> + u64 nak_inv_req;
> + u64 nak_access_err;
> + u64 nak_opp_err;
> + u64 nak_inv_read;
> + u64 responder_local_len_err;
> + u64 requestor_local_prot_err;
> + u64 responder_rem_access_err;
> + u64 responder_local_qp_err;
> + u64 responder_malformed_wqe;
> + u64 general_hw_err;
> + u64 requester_rnr_nak_retries_exceeded;
> + u64 requester_retries_exceeded;
> + u64 total_fatal_err;
> + u64 received_cnps;
> + u64 num_qps_congested;
> + u64 rate_inc_events;
> + u64 num_qps_recovered;
> + u64 current_rate;
> +}; /* HW Data */
> +
> static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) {
> return mdev->gdma_dev->gdma_context;
> --
> 2.43.0
Powered by blists - more mailing lists