[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240901005456.25275-3-michaelgur@nvidia.com>
Date: Sun, 1 Sep 2024 03:54:54 +0300
From: Michael Guralnik <michaelgur@...dia.com>
To: <dsahern@...il.com>, <leonro@...dia.com>
CC: <jgg@...dia.com>, <linux-rdma@...r.kernel.org>, <netdev@...r.kernel.org>,
Chiara Meiohas <cmeiohas@...dia.com>, Michael Guralnik
<michaelgur@...dia.com>
Subject: [RFC iproute2-next 2/4] rdma: Add support for rdma monitor
From: Chiara Meiohas <cmeiohas@...dia.com>
Introduce a new command for RDMA event monitoring.
This patch adds a new attribute "event_type" which describes
the event recieved. Add a new NETLINK_RDMA multicast group
and processes listening to this multicast group receive RDMA
events.
The event types supported are IB device registration/unregistration
and net device attachment/detachment.
Example output of rdma monitor and the commands which trigger
the events:
$ rdma monitor
$ rmmod mlx5_ib
[UNREGISTER] dev 3
[UNREGISTER] dev 0
$modprobe mlx5_ib
[REGISTER] dev 4
[NETDEV_ATTACH] dev 4 port 1 netdev 4
[REGISTER] dev 5
[NETDEV_ATTACH] dev 5 port 1 netdev 5
$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
[UNREGISTER] dev 4
[REGISTER] dev 6
[NETDEV_ATTACH] dev 6 port 6 netdev 4
$ echo 4 > /sys/class/net/eth2/device/sriov_numvfs
[NETDEV_ATTACH] dev 6 port 2 netdev 7
[NETDEV_ATTACH] dev 6 port 3 netdev 8
[NETDEV_ATTACH] dev 6 port 4 netdev 9
[NETDEV_ATTACH] dev 6 port 5 netdev 10
[REGISTER] dev 7
[NETDEV_ATTACH] dev 7 port 1 netdev 11
[REGISTER] dev 8
[NETDEV_ATTACH] dev 8 port 1 netdev 12
[REGISTER] dev 9
[NETDEV_ATTACH] dev 9 port 1 netdev 13
[REGISTER] dev 10
[NETDEV_ATTACH] dev 10 port 1 netdev 14
$ echo 0 > /sys/class/net/eth2/device/sriov_numvfs
[UNREGISTER] dev 7
[UNREGISTER] dev 8
[UNREGISTER] dev 9
[UNREGISTER] dev 10
[NETDEV_DETACH] dev 6 port 2
[NETDEV_DETACH] dev 6 port 3
[NETDEV_DETACH] dev 6 port 4
[NETDEV_DETACH] dev 6 port 5
Signed-off-by: Chiara Meiohas <cmeiohas@...dia.com>
Signed-off-by: Michael Guralnik <michaelgur@...dia.com>
---
include/mnl_utils.h | 1 +
lib/mnl_utils.c | 5 ++
man/man8/rdma-monitor.8 | 51 ++++++++++++
man/man8/rdma.8 | 7 +-
rdma/Makefile | 3 +-
rdma/monitor.c | 167 ++++++++++++++++++++++++++++++++++++++++
rdma/rdma.c | 3 +-
rdma/rdma.h | 1 +
rdma/utils.c | 1 +
9 files changed, 236 insertions(+), 3 deletions(-)
create mode 100644 man/man8/rdma-monitor.8
create mode 100644 rdma/monitor.c
diff --git a/include/mnl_utils.h b/include/mnl_utils.h
index 76fe1dfe..0ddf2932 100644
--- a/include/mnl_utils.h
+++ b/include/mnl_utils.h
@@ -24,6 +24,7 @@ int mnlu_gen_socket_sndrcv(struct mnlu_gen_socket *nlg, const struct nlmsghdr *n
mnl_cb_t data_cb, void *data);
struct mnl_socket *mnlu_socket_open(int bus);
+int mnl_add_nl_group(struct mnl_socket *nl, unsigned int group);
struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags,
void *extra_header, size_t extra_header_size);
int mnlu_socket_recv_run(struct mnl_socket *nl, unsigned int seq, void *buf, size_t buf_size,
diff --git a/lib/mnl_utils.c b/lib/mnl_utils.c
index 6c8f527e..5f6671bf 100644
--- a/lib/mnl_utils.c
+++ b/lib/mnl_utils.c
@@ -35,6 +35,11 @@ err_bind:
return NULL;
}
+int mnl_add_nl_group(struct mnl_socket *nl, unsigned int group)
+{
+ return mnl_socket_bind(nl, group, MNL_SOCKET_AUTOPID);
+}
+
struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags,
void *extra_header, size_t extra_header_size)
{
diff --git a/man/man8/rdma-monitor.8 b/man/man8/rdma-monitor.8
new file mode 100644
index 00000000..d445cba0
--- /dev/null
+++ b/man/man8/rdma-monitor.8
@@ -0,0 +1,51 @@
+.TH RDMA\-MONITOR 8 "22 Jul 2024" "iproute2" "Linux"
+.SH NAME
+rdma-monitor \- RDMA events monitoring
+.SH SYNOPSIS
+.sp
+.ad l
+.in +8
+.ti -8
+.B rdma
+.RI "[ " OPTIONS " ]"
+.B monitor
+.RI " { " help " }"
+.sp
+
+.ti -8
+.IR OPTIONS " := { "
+\fB\-V\fR[\fIersion\fR] }
+
+.ti -8
+.B rdma monitor
+
+.ti -8
+.B rdma monitor help
+
+.SH "DESCRIPTION"
+.SS rdma monitor - utility can monitor RDMA device events on all RDMA devices.
+.PP
+.B rdma
+opens an RDMA Netlink socket, listens on it and dumps the event info.
+
+The event types supported are RDMA device registration/unregistration
+and net device attachment/detachment.
+
+.SH "EXAMPLES"
+.PP
+rdma monitor
+.RS 4
+Listen for events of all RDMA devices
+.RE
+.PP
+
+.SH SEE ALSO
+.BR rdma (8),
+.BR rdma-link (8),
+.BR rdma-resource (8),
+.BR rdma-system (8),
+.BR rdma-statistic (8),
+.br
+
+.SH AUTHOR
+Chiara Meiohas <cmeiohas@...dia.com>
diff --git a/man/man8/rdma.8 b/man/man8/rdma.8
index 5088b9ec..df86284d 100644
--- a/man/man8/rdma.8
+++ b/man/man8/rdma.8
@@ -19,7 +19,7 @@ rdma \- RDMA tool
.ti -8
.IR OBJECT " := { "
-.BR dev " | " link " | " resource " | " system " | " statistic " }"
+.BR dev " | " link " | " resource " | " system " | " statistic " | " monitor " }"
.sp
.ti -8
@@ -94,6 +94,10 @@ character.
.B statistic
- RDMA counter statistic related.
+.TP
+.B monitor
+- RDMA events monitor
+
.PP
The names of all objects may be written in full or
abbreviated form, for example
@@ -133,6 +137,7 @@ Exit status is 0 if command was successful or a positive integer upon failure.
.BR rdma-resource (8),
.BR rdma-system (8),
.BR rdma-statistic (8),
+.BR rdma-monitor (8),
.br
.SH REPORTING BUGS
diff --git a/rdma/Makefile b/rdma/Makefile
index 37d904a7..ed3c1c1c 100644
--- a/rdma/Makefile
+++ b/rdma/Makefile
@@ -4,7 +4,8 @@ include ../config.mk
CFLAGS += -I./include/uapi/
RDMA_OBJ = rdma.o utils.o dev.o link.o res.o res-pd.o res-mr.o res-cq.o \
- res-cmid.o res-qp.o sys.o stat.o stat-mr.o res-ctx.o res-srq.o
+ res-cmid.o res-qp.o sys.o stat.o stat-mr.o res-ctx.o res-srq.o \
+ monitor.o
TARGETS += rdma
diff --git a/rdma/monitor.c b/rdma/monitor.c
new file mode 100644
index 00000000..d74727a0
--- /dev/null
+++ b/rdma/monitor.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * monitor.c RDMA tool
+ * Authors: Chiara Meiohas <cmeiohas@...dia.com>
+ */
+
+#include "rdma.h"
+
+/* Global utils flags */
+extern int json;
+
+static const char *event_type_to_str(uint8_t etype)
+{
+ static const char *const event_types_str[] = { "[REGISTER]",
+ "[UNREGISTER]",
+ "[NETDEV_ATTACH]",
+ "[NETDEV_DETACH]" };
+
+ if (etype < ARRAY_SIZE(event_types_str))
+ return event_types_str[etype];
+
+ return "[UNKNOWN]";
+}
+
+static int mon_show_rdma_register(struct nlattr **tb)
+{
+ enum rdma_nl_event_type etype;
+ uint32_t rdma_idx;
+
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return MNL_CB_ERROR;
+
+ rdma_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]);
+
+ open_json_object(NULL);
+ print_string(PRINT_ANY, "event_type", "%s\t",
+ event_type_to_str(etype));
+ print_uint(PRINT_ANY, "rdma_index", "dev %u", rdma_idx);
+ close_json_object();
+ newline();
+ fflush(stdout);
+
+ return MNL_CB_OK;
+}
+
+static int mon_show_netdev_association(struct nlattr **tb)
+{
+ uint32_t rdma_idx, port, net_idx;
+ enum rdma_nl_event_type etype;
+
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ return MNL_CB_ERROR;
+ }
+
+ rdma_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]);
+
+ open_json_object(NULL);
+ print_string(PRINT_ANY, "event_type", "%s\t", event_type_to_str(etype));
+ print_uint(PRINT_ANY, "rdma_index", "dev %u", rdma_idx);
+ print_uint(PRINT_ANY, "port", " port %u", port);
+
+ if (etype == RDMA_NETDEV_ATTACH_EVENT) {
+ net_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_NDEV_INDEX]);
+ print_uint(PRINT_ANY, "netdev_index", " netdev %u", net_idx);
+ }
+ close_json_object();
+ newline();
+ fflush(stdout);
+
+ return MNL_CB_OK;
+}
+
+static int mon_show_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX + 1] = {};
+ enum rdma_nl_event_type etype;
+
+ mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
+ if (!tb[RDMA_NLDEV_ATTR_EVENT_TYPE])
+ return MNL_CB_ERROR;
+
+ etype = mnl_attr_get_u8(tb[RDMA_NLDEV_ATTR_EVENT_TYPE]);
+
+ switch (etype) {
+ case RDMA_REGISTER_EVENT:
+ case RDMA_UNREGISTER_EVENT:
+ return mon_show_rdma_register(tb);
+ case RDMA_NETDEV_ATTACH_EVENT:
+ case RDMA_NETDEV_DETACH_EVENT:
+ return mon_show_netdev_association(tb);
+ default:
+ return MNL_CB_ERROR;
+ }
+ return MNL_CB_OK;
+}
+
+static int mon_show(struct rd* rd)
+{
+ unsigned int groups = 0;
+ int one = 1;
+ char *buf;
+ int err;
+
+ buf = malloc(MNL_SOCKET_BUFFER_SIZE);
+ if (!buf) {
+ printf("Buffer allocation failed\n");
+ return -ENOMEM;
+ }
+
+ rd->nl = mnl_socket_open(NETLINK_RDMA);
+ if (!rd->nl) {
+ pr_err("Failed to open NETLINK_RDMA socket. Error: %s\n",
+ strerror(errno));
+ err = -ENODEV;
+ goto err_free;
+ }
+ mnl_socket_setsockopt(rd->nl, NETLINK_CAP_ACK, &one, sizeof(one));
+ mnl_socket_setsockopt(rd->nl, NETLINK_EXT_ACK, &one, sizeof(one));
+
+ groups |= nl_mgrp(RDMA_NL_GROUP_NOTIFY);
+
+ err = mnl_add_nl_group(rd->nl, groups);
+ if (err < 0) {
+ pr_err("Failed to add NETLINK_RDMA multicast group. Error: %s\n",
+ strerror(errno));
+ goto err_close;
+ }
+ new_json_obj(json);
+
+ err = mnlu_socket_recv_run(rd->nl, 0, buf, MNL_SOCKET_BUFFER_SIZE,
+ mon_show_cb, rd);
+ if (err) {
+ pr_err("Failed to listen to rdma socket\n");
+ goto err_free_json;
+ }
+
+ return 0;
+
+err_free_json:
+ delete_json_obj();
+err_close:
+ mnl_socket_close(rd->nl);
+err_free:
+ free(buf);
+ return err;
+}
+
+static int mon_help(struct rd *rd)
+{
+ pr_out("Usage: rdma monitor [ -j ]\n");
+ return 0;
+}
+
+int cmd_mon(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, mon_show },
+ { "help", mon_help },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "mon command");
+}
+
diff --git a/rdma/rdma.c b/rdma/rdma.c
index 131c6b2a..253ac58b 100644
--- a/rdma/rdma.c
+++ b/rdma/rdma.c
@@ -15,7 +15,7 @@ static void help(char *name)
{
pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n"
" %s [ -f[orce] ] -b[atch] filename\n"
- "where OBJECT := { dev | link | resource | system | statistic | help }\n"
+ "where OBJECT := { dev | link | resource | monitor | system | statistic | help }\n"
" OPTIONS := { -V[ersion] | -d[etails] | -j[son] | -p[retty] | -r[aw]}\n", name, name);
}
@@ -35,6 +35,7 @@ static int rd_cmd(struct rd *rd, int argc, char **argv)
{ "resource", cmd_res },
{ "system", cmd_sys },
{ "statistic", cmd_stat },
+ { "monitor", cmd_mon },
{ 0 }
};
diff --git a/rdma/rdma.h b/rdma/rdma.h
index d224ec57..fb037bcf 100644
--- a/rdma/rdma.h
+++ b/rdma/rdma.h
@@ -98,6 +98,7 @@ int cmd_link(struct rd *rd);
int cmd_res(struct rd *rd);
int cmd_sys(struct rd *rd);
int cmd_stat(struct rd *rd);
+int cmd_mon(struct rd* rd);
int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str);
int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd));
int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd));
diff --git a/rdma/utils.c b/rdma/utils.c
index 4d3803b5..bc104e0f 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -477,6 +477,7 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = MNL_TYPE_U8,
[RDMA_NLDEV_ATTR_DEV_TYPE] = MNL_TYPE_U8,
[RDMA_NLDEV_ATTR_PARENT_NAME] = MNL_TYPE_STRING,
+ [RDMA_NLDEV_ATTR_EVENT_TYPE] = MNL_TYPE_U8,
};
static int rd_attr_check(const struct nlattr *attr, int *typep)
--
2.17.2
Powered by blists - more mailing lists