[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210823194230.GB1006065@nvidia.com>
Date: Mon, 23 Aug 2021 16:42:30 -0300
From: Jason Gunthorpe <jgg@...dia.com>
To: Mark Zhang <markzhang@...dia.com>
Cc: dledford@...hat.com, saeedm@...dia.com, linux-rdma@...r.kernel.org,
netdev@...r.kernel.org, aharonl@...dia.com, netao@...dia.com,
leonro@...dia.com
Subject: Re: [PATCH rdma-next 06/10] RDMA/nldev: Add support to add and
remove optional counters
On Wed, Aug 18, 2021 at 02:24:24PM +0300, Mark Zhang wrote:
> From: Aharon Landau <aharonl@...dia.com>
>
> This patch adds the ability to add/remove optional counter to a link
> through RDMA netlink. Limit it to users with ADMIN capability only.
>
> Examples:
> $ sudo rdma statistic add link rocep8s0f0/1 optional-set cc_rx_ce_pkts
> $ sudo rdma statistic remove link rocep8s0f0/1 optional-set cc_rx_ce_pkts
>
> Signed-off-by: Aharon Landau <aharonl@...dia.com>
> Signed-off-by: Neta Ostrovsky <netao@...dia.com>
> Signed-off-by: Mark Zhang <markzhang@...dia.com>
> drivers/infiniband/core/counters.c | 50 ++++++++++++++++
> drivers/infiniband/core/device.c | 2 +
> drivers/infiniband/core/nldev.c | 93 ++++++++++++++++++++++++++++++
> include/rdma/ib_verbs.h | 7 +++
> include/rdma/rdma_counter.h | 4 ++
> include/rdma/rdma_netlink.h | 1 +
> include/uapi/rdma/rdma_netlink.h | 9 +++
> 7 files changed, 166 insertions(+)
>
> diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
> index b8b6db98bfdf..fa04178aa0eb 100644
> +++ b/drivers/infiniband/core/counters.c
> @@ -106,6 +106,56 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
> return ret;
> }
>
> +static struct rdma_op_counter *get_opcounter(struct rdma_op_stats *opstats,
> + const char *name)
> +{
> + int i;
> +
> + for (i = 0; i < opstats->num_opcounters; i++)
> + if (!strcmp(opstats->opcounters[i].name, name))
> + return opstats->opcounters + i;
> +
> + return NULL;
> +}
Export this and have the netlink code call it instead of working with
strings.
> +static int rdma_opcounter_set(struct ib_device *dev, u32 port,
> + const char *name, bool is_add)
> +{
> + struct rdma_port_counter *port_counter;
> + struct rdma_op_counter *opc;
> + int ret;
> +
> + if (!dev->ops.add_op_stat || !dev->ops.remove_op_stat)
> + return -EOPNOTSUPP;
> +
> + port_counter = &dev->port_data[port].port_counter;
> + opc = get_opcounter(port_counter->opstats, name);
> + if (!opc)
> + return -EINVAL;
> +
> + mutex_lock(&port_counter->opstats->lock);
> + ret = is_add ? dev->ops.add_op_stat(dev, port, opc->type) :
> + dev->ops.remove_op_stat(dev, port, opc->type);
Drivers should work by indexes not types, that is how the counter API
is designed
> +int rdma_opcounter_add(struct ib_device *dev, u32 port, const char *name)
> +{
> + return rdma_opcounter_set(dev, port, name, true);
> +}
> +
> +int rdma_opcounter_remove(struct ib_device *dev, u32 port,
> + const char *name)
> +{
> + return rdma_opcounter_set(dev, port, name, false);
> +}
Just pass in the add/remove flag - all this switching between wrappers
adding the flag is ugly. Do it all the way to the driver.
> +static int nldev_stat_set_op_stat(struct sk_buff *skb,
> + struct nlmsghdr *nlh,
> + struct netlink_ext_ack *extack,
> + bool cmd_add)
> +{
> + char opcounter[RDMA_NLDEV_ATTR_OPCOUNTER_NAME_SIZE] = {};
> + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
> + struct ib_device *device;
> + struct sk_buff *msg;
> + u32 index, port;
> + int ret;
> +
> + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
> + nldev_policy, extack);
> +
> + if (ret || !tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME] ||
> + !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
> + !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
> + return -EINVAL;
> +
> + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
> + device = ib_device_get_by_index(sock_net(skb->sk), index);
> + if (!device)
> + return -EINVAL;
> +
> + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
> + if (!rdma_is_port_valid(device, port)) {
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + nla_strscpy(opcounter, tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENTRY_NAME],
> + sizeof(opcounter));
> +
> + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> + if (!msg) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
> + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
> + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
> + (cmd_add ?
> + RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER :
> + RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER)),
> + 0, 0);
> +
> + if (cmd_add)
> + ret = rdma_opcounter_add(device, port, opcounter);
> + else
> + ret = rdma_opcounter_remove(device, port, opcounter);
> + if (ret)
> + goto err_msg;
> +
> + nlmsg_end(msg, nlh);
Shouldn't the netlink message for a 'set' always return the current
value of the thing being set on return? Eg the same output that GET
would generate?
> +static int nldev_stat_add_op_stat_doit(struct sk_buff *skb,
> + struct nlmsghdr *nlh,
> + struct netlink_ext_ack *extack)
> +{
> + return nldev_stat_set_op_stat(skb, nlh, extack, true);
> +}
> +
> +static int nldev_stat_remove_op_stat_doit(struct sk_buff *skb,
> + struct nlmsghdr *nlh,
> + struct netlink_ext_ack *extack)
> +{
> + return nldev_stat_set_op_stat(skb, nlh, extack, false);
> +}
> +
> static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
> struct netlink_ext_ack *extack)
> {
> @@ -2342,6 +2427,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
> .dump = nldev_res_get_mr_raw_dumpit,
> .flags = RDMA_NL_ADMIN_PERM,
> },
> + [RDMA_NLDEV_CMD_STAT_ADD_OPCOUNTER] = {
> + .doit = nldev_stat_add_op_stat_doit,
> + .flags = RDMA_NL_ADMIN_PERM,
> + },
> + [RDMA_NLDEV_CMD_STAT_REMOVE_OPCOUNTER] = {
> + .doit = nldev_stat_remove_op_stat_doit,
> + .flags = RDMA_NL_ADMIN_PERM,
> + },
> };
And here I wonder if this is the cannonical way to manipulate lists of
strings in netlink? I'm trying to think of another case like this, did
you reference something?
Are you sure this shouldn't be done via some set on some counter
object?
Jason
Powered by blists - more mailing lists