[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <DB5PR05MB1029A3A82E19D675570A36ACDB880@DB5PR05MB1029.eurprd05.prod.outlook.com>
Date: Mon, 14 Mar 2016 15:11:29 +0000
From: Elad Raz <eladr@...lanox.com>
To: Roopa Prabhu <roopa@...ulusnetworks.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC: "jhs@...atatu.com" <jhs@...atatu.com>,
"davem@...emloft.net" <davem@...emloft.net>,
Jiri Pirko <jiri@...lanox.com>,
Ido Schimmel <idosch@...lanox.com>
Subject: RE: [PATCH net-next 1/2] rtnetlink: add new RTM_GETSTATS message to
dump link stats
> -----Original Message-----
> From: netdev-owner@...r.kernel.org [mailto:netdev-owner@...r.kernel.org]
> On Behalf Of Roopa Prabhu
> Sent: Sunday, March 13, 2016 3:56 AM
> To: netdev@...r.kernel.org
> Cc: jhs@...atatu.com; davem@...emloft.net
> Subject: [PATCH net-next 1/2] rtnetlink: add new RTM_GETSTATS message to
> dump link stats
>
> From: Roopa Prabhu <roopa@...ulusnetworks.com>
>
> This patch adds a new RTM_GETSTATS message to query link stats via
> netlink from the kernel. RTM_NEWLINK also dumps stats today, but
> RTM_NEWLINK returns a lot more than just stats and is expensive in some
> cases when frequent polling for stats from userspace is a common
> operation.
>
> RTM_GETSTATS is an attempt to provide a light weight netlink message to
> explicity query only link stats from the kernel on an interface.
> The idea is to also keep it extensible so that new kinds of stats can be
> added to it in the future.
>
> This patch adds the following attribute for NETDEV stats:
> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
> [IFLA_STATS_LINK64] = { .len = sizeof(struct rtnl_link_stats64)
> }, };
>
> This patch also allows for af family stats (an example af stats for IPV6
> is available with the second patch in the series).
>
> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats
> of a single interface or all interfaces with NLM_F_DUMP.
>
> Future possible new types of stat attributes:
> - IFLA_MPLS_STATS (nested. for mpls/mdev stats)
> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like
> bridge,
> vlan, vxlan etc)
> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
> available via ethtool today)
>
> This patch also declares a filter mask for all stat attributes.
> User has to provide a mask of stats attributes to query. This will be
> specified in a new hdr 'struct if_stats_msg' for stats messages.
>
> Without any attributes in the filter_mask, no stats will be returned.
>
> This patch has been tested with modified iproute2 ifstat.
>
> Suggested-by: Jamal Hadi Salim <jhs@...atatu.com>
> Signed-off-by: Roopa Prabhu <roopa@...ulusnetworks.com>
> ---
> include/net/rtnetlink.h | 5 ++
> include/uapi/linux/if_link.h | 19 ++++
> include/uapi/linux/rtnetlink.h | 7 ++
> net/core/rtnetlink.c | 200
> +++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 231 insertions(+)
>
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index
> 2f87c1b..fa68158 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -131,6 +131,11 @@ struct rtnl_af_ops {
> const struct nlattr *attr);
> int (*set_link_af)(struct net_device *dev,
> const struct nlattr *attr);
> + size_t (*get_link_af_stats_size)(const struct
> net_device *dev,
> + u32 filter_mask);
> + int (*fill_link_af_stats)(struct sk_buff *skb,
> + const struct net_device *dev,
> + u32 filter_mask);
> };
>
> void __rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git
> a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index
> 249eef9..0840f3e 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -741,4 +741,23 @@ enum {
>
> #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
>
> +/* STATS section */
> +
> +struct if_stats_msg {
> + __u8 family;
> + __u32 ifindex;
> + __u32 filter_mask;
> +};
> +
> +enum {
> + IFLA_STATS_UNSPEC,
> + IFLA_STATS_LINK64,
> + IFLA_STATS_INET6,
> + __IFLA_STATS_MAX,
> +};
> +
> +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
> +
> +#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR))
> +
> #endif /* _UAPI_LINUX_IF_LINK_H */
> diff --git a/include/uapi/linux/rtnetlink.h
> b/include/uapi/linux/rtnetlink.h index ca764b5..2bbb300 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -139,6 +139,13 @@ enum {
> RTM_GETNSID = 90,
> #define RTM_GETNSID RTM_GETNSID
>
> + RTM_NEWSTATS = 92,
> +#define RTM_NEWSTATS RTM_NEWSTATS
I think that RTM_NEWSTATS and RTM_DELSTATS aren't good names, since user doesn't add/del statistics but only query.
Maybe just stay with RTM_GETSTATS and the message back to user will be RTM_GETSTATS as well?
> + RTM_DELSTATS = 93,
> +#define RTM_DELSTATS RTM_DELSTATS
This is not in used
> + RTM_GETSTATS = 94,
> +#define RTM_GETSTATS RTM_GETSTATS
> +
> __RTM_MAX,
> #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
> };
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index
> d2d9e5e..d1e3d17 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3410,6 +3410,203 @@ out:
> return err;
> }
>
> +static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device
> *dev,
> + int type, u32 pid, u32 seq, u32 change,
> + unsigned int flags, unsigned int filter_mask) {
> + const struct rtnl_link_stats64 *stats;
> + struct rtnl_link_stats64 temp;
> + struct if_stats_msg *ifsm;
> + struct nlmsghdr *nlh;
> + struct rtnl_af_ops *af_ops;
> + struct nlattr *attr;
> +
> + ASSERT_RTNL();
> +
> + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
> + if (!nlh)
> + return -EMSGSIZE;
> +
> + ifsm = nlmsg_data(nlh);
> + ifsm->ifindex = dev->ifindex;
> + ifsm->filter_mask = filter_mask;
> +
> + if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64)) {
> + attr = nla_reserve(skb, IFLA_STATS_LINK64,
> + sizeof(struct rtnl_link_stats64));
> + if (!attr)
> + return -EMSGSIZE;
> +
> + stats = dev_get_stats(dev, &temp);
> +
> + copy_rtnl_link_stats64(nla_data(attr), stats);
> + }
> +
> + list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> + if (af_ops->fill_link_af_stats) {
> + int err;
> +
> + err = af_ops->fill_link_af_stats(skb, dev, filter_mask);
> + if (err < 0)
> + goto nla_put_failure;
> + }
> + }
> +
> + nlmsg_end(skb, nlh);
> +
> + return 0;
> +
> +nla_put_failure:
> + nlmsg_cancel(skb, nlh);
> +
> + return -EMSGSIZE;
> +}
> +
> +static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] =
> {
> + [IFLA_STATS_LINK64] = { .len = sizeof(struct rtnl_link_stats64)
> },
> +};
> +
> +static size_t rtnl_link_get_af_stats_size(const struct net_device *dev,
> + u32 filter_mask)
> +{
> + struct rtnl_af_ops *af_ops;
> + size_t size = 0;
> +
> + list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> + if (af_ops->get_link_af_stats_size)
> + size += af_ops->get_link_af_stats_size(dev,
> + filter_mask);
> + }
> +
> + return size;
> +}
> +
> +static noinline size_t if_nlmsg_stats_size(const struct net_device
> *dev,
> + u32 filter_mask)
> +{
> + size_t size = 0;
> +
> + if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64))
> + size += nla_total_size(sizeof(struct rtnl_link_stats64));
> +
> + size += rtnl_link_get_af_stats_size(dev, filter_mask);
> +
> + return size;
> +}
> +
> +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) {
> + struct net *net = sock_net(skb->sk);
> + struct if_stats_msg *ifsm;
> + struct net_device *dev = NULL;
> + struct sk_buff *nskb;
> + u32 filter_mask;
> + int err;
> +
> + ifsm = nlmsg_data(nlh);
> + if (ifsm->ifindex > 0)
> + dev = __dev_get_by_index(net, ifsm->ifindex);
> + else
> + return -EINVAL;
> +
> + if (!dev)
> + return -ENODEV;
> +
> + filter_mask = ifsm->filter_mask;
> + if (!filter_mask)
> + return -EINVAL;
> +
> + nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask),
> GFP_KERNEL);
> + if (!nskb)
> + return -ENOBUFS;
> +
> + err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
> + NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
> + 0, filter_mask);
> + if (err < 0) {
> + /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
> + WARN_ON(err == -EMSGSIZE);
> + kfree_skb(nskb);
> + } else {
> + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
> + }
> +
> + return err;
> +}
> +
> +static u16 rtnl_stats_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
> +{
> + struct net *net = sock_net(skb->sk);
> + struct net_device *dev;
> + u16 min_ifinfo_dump_size = 0;
> + struct if_stats_msg *ifsm;
> + u32 filter_mask;
> +
> + ifsm = nlmsg_data(nlh);
> + filter_mask = ifsm->filter_mask;
> +
> + /* traverse the list of net devices and compute the minimum
> + * buffer size based upon the filter mask.
> + */
> + list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> + min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
> + if_nlmsg_stats_size(dev,
> + filter_mask));
> + }
> +
> + return min_ifinfo_dump_size;
> +}
> +
> +static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback
> +*cb) {
> + struct net *net = sock_net(skb->sk);
> + struct if_stats_msg *ifsm;
> + int h, s_h;
> + int idx = 0, s_idx;
> + struct net_device *dev;
> + struct hlist_head *head;
> + unsigned int flags = NLM_F_MULTI;
> + u32 filter_mask = 0;
> + int err;
> +
> + s_h = cb->args[0];
> + s_idx = cb->args[1];
> +
> + cb->seq = net->dev_base_seq;
> +
> + ifsm = nlmsg_data(cb->nlh);
> + filter_mask = ifsm->filter_mask;
> +
> + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
> + idx = 0;
> + head = &net->dev_index_head[h];
> + hlist_for_each_entry(dev, head, index_hlist) {
> + if (idx < s_idx)
> + goto cont;
> + err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
> + NETLINK_CB(cb->skb).portid,
> + cb->nlh->nlmsg_seq, 0,
> + flags, filter_mask);
> + /* If we ran out of room on the first message,
> + * we're in trouble
> + */
> + WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
> +
> + if (err < 0)
> + goto out;
> +
> + nl_dump_check_consistent(cb, nlmsg_hdr(skb));
> +cont:
> + idx++;
> + }
> + }
> +out:
> + cb->args[1] = idx;
> + cb->args[0] = h;
> +
> + return skb->len;
> +}
> +
> /* Process one rtnetlink message. */
>
> static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> @@ -3559,4 +3756,7 @@ void __init rtnetlink_init(void)
> rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink,
> NULL);
> rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL,
> NULL);
> rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL,
> NULL);
> +
> + rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get,
> rtnl_stats_dump,
> + rtnl_stats_calcit);
> }
> --
> 1.9.1
Powered by blists - more mailing lists