lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <DB5PR05MB1029A3A82E19D675570A36ACDB880@DB5PR05MB1029.eurprd05.prod.outlook.com>
Date:	Mon, 14 Mar 2016 15:11:29 +0000
From:	Elad Raz <eladr@...lanox.com>
To:	Roopa Prabhu <roopa@...ulusnetworks.com>,
	"netdev@...r.kernel.org" <netdev@...r.kernel.org>
CC:	"jhs@...atatu.com" <jhs@...atatu.com>,
	"davem@...emloft.net" <davem@...emloft.net>,
	Jiri Pirko <jiri@...lanox.com>,
	Ido Schimmel <idosch@...lanox.com>
Subject: RE: [PATCH net-next 1/2] rtnetlink: add new RTM_GETSTATS message to
 dump link stats



> -----Original Message-----
> From: netdev-owner@...r.kernel.org [mailto:netdev-owner@...r.kernel.org]
> On Behalf Of Roopa Prabhu
> Sent: Sunday, March 13, 2016 3:56 AM
> To: netdev@...r.kernel.org
> Cc: jhs@...atatu.com; davem@...emloft.net
> Subject: [PATCH net-next 1/2] rtnetlink: add new RTM_GETSTATS message to
> dump link stats
> 
> From: Roopa Prabhu <roopa@...ulusnetworks.com>
> 
> This patch adds a new RTM_GETSTATS message to query link stats via
> netlink from the kernel. RTM_NEWLINK also dumps stats today, but
> RTM_NEWLINK returns a lot more than just stats and is expensive in some
> cases when frequent polling for stats from userspace is a common
> operation.
> 
> RTM_GETSTATS is an attempt to provide a light weight netlink message to
> explicity query only link stats from the kernel on an interface.
> The idea is to also keep it extensible so that new kinds of stats can be
> added to it in the future.
> 
> This patch adds the following attribute for NETDEV stats:
> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>         [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64)
> }, };
> 
> This patch also allows for af family stats (an example af stats for IPV6
> is available with the second patch in the series).
> 
> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats
> of a single interface or all interfaces with NLM_F_DUMP.
> 
> Future possible new types of stat attributes:
> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like
> bridge,
>   vlan, vxlan etc)
> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>   available via ethtool today)
> 
> This patch also declares a filter mask for all stat attributes.
> User has to provide a mask of stats attributes to query. This will be
> specified in a new hdr 'struct if_stats_msg' for stats messages.
> 
> Without any attributes in the filter_mask, no stats will be returned.
> 
> This patch has been tested with modified iproute2 ifstat.
> 
> Suggested-by: Jamal Hadi Salim <jhs@...atatu.com>
> Signed-off-by: Roopa Prabhu <roopa@...ulusnetworks.com>
> ---
>  include/net/rtnetlink.h        |   5 ++
>  include/uapi/linux/if_link.h   |  19 ++++
>  include/uapi/linux/rtnetlink.h |   7 ++
>  net/core/rtnetlink.c           | 200
> +++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 231 insertions(+)
> 
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index
> 2f87c1b..fa68158 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -131,6 +131,11 @@ struct rtnl_af_ops {
>  						    const struct nlattr *attr);
>  	int			(*set_link_af)(struct net_device *dev,
>  					       const struct nlattr *attr);
> +	size_t			(*get_link_af_stats_size)(const struct
> net_device *dev,
> +							  u32 filter_mask);
> +	int			(*fill_link_af_stats)(struct sk_buff *skb,
> +						      const struct net_device *dev,
> +						      u32 filter_mask);
>  };
> 
>  void __rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git
> a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index
> 249eef9..0840f3e 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -741,4 +741,23 @@ enum {
> 
>  #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
> 
> +/* STATS section */
> +
> +struct if_stats_msg {
> +	__u8  family;
> +	__u32 ifindex;
> +	__u32 filter_mask;
> +};
> +
> +enum {
> +	IFLA_STATS_UNSPEC,
> +	IFLA_STATS_LINK64,
> +	IFLA_STATS_INET6,
> +	__IFLA_STATS_MAX,
> +};
> +
> +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
> +
> +#define IFLA_STATS_FILTER_BIT(ATTR)	(1 << (ATTR))
> +
>  #endif /* _UAPI_LINUX_IF_LINK_H */
> diff --git a/include/uapi/linux/rtnetlink.h
> b/include/uapi/linux/rtnetlink.h index ca764b5..2bbb300 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -139,6 +139,13 @@ enum {
>  	RTM_GETNSID = 90,
>  #define RTM_GETNSID RTM_GETNSID
> 
> +	RTM_NEWSTATS = 92,
> +#define RTM_NEWSTATS RTM_NEWSTATS

I think that RTM_NEWSTATS and RTM_DELSTATS aren't good names, since user doesn't add/del statistics but only query.
Maybe just stay with RTM_GETSTATS and the message back to user will be RTM_GETSTATS as well?

> +	RTM_DELSTATS = 93,
> +#define RTM_DELSTATS RTM_DELSTATS

This is not in used

> +	RTM_GETSTATS = 94,
> +#define RTM_GETSTATS RTM_GETSTATS
> +
>  	__RTM_MAX,
>  #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
>  };
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index
> d2d9e5e..d1e3d17 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3410,6 +3410,203 @@ out:
>  	return err;
>  }
> 
> +static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device
> *dev,
> +			       int type, u32 pid, u32 seq, u32 change,
> +			       unsigned int flags, unsigned int filter_mask) {
> +	const struct rtnl_link_stats64 *stats;
> +	struct rtnl_link_stats64 temp;
> +	struct if_stats_msg *ifsm;
> +	struct nlmsghdr *nlh;
> +	struct rtnl_af_ops *af_ops;
> +	struct nlattr *attr;
> +
> +	ASSERT_RTNL();
> +
> +	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
> +	if (!nlh)
> +		return -EMSGSIZE;
> +
> +	ifsm = nlmsg_data(nlh);
> +	ifsm->ifindex = dev->ifindex;
> +	ifsm->filter_mask = filter_mask;
> +
> +	if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64)) {
> +		attr = nla_reserve(skb, IFLA_STATS_LINK64,
> +				   sizeof(struct rtnl_link_stats64));
> +		if (!attr)
> +			return -EMSGSIZE;
> +
> +		stats = dev_get_stats(dev, &temp);
> +
> +		copy_rtnl_link_stats64(nla_data(attr), stats);
> +	}
> +
> +	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> +		if (af_ops->fill_link_af_stats) {
> +			int err;
> +
> +			err = af_ops->fill_link_af_stats(skb, dev, filter_mask);
> +			if (err < 0)
> +				goto nla_put_failure;
> +		}
> +	}
> +
> +	nlmsg_end(skb, nlh);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nlmsg_cancel(skb, nlh);
> +
> +	return -EMSGSIZE;
> +}
> +
> +static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] =
> {
> +	[IFLA_STATS_LINK64]	= { .len = sizeof(struct rtnl_link_stats64)
> },
> +};
> +
> +static size_t rtnl_link_get_af_stats_size(const struct net_device *dev,
> +					  u32 filter_mask)
> +{
> +	struct rtnl_af_ops *af_ops;
> +	size_t size = 0;
> +
> +	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> +		if (af_ops->get_link_af_stats_size)
> +			size += af_ops->get_link_af_stats_size(dev,
> +							       filter_mask);
> +	}
> +
> +	return size;
> +}
> +
> +static noinline size_t if_nlmsg_stats_size(const struct net_device
> *dev,
> +					   u32 filter_mask)
> +{
> +	size_t size = 0;
> +
> +	if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64))
> +		size += nla_total_size(sizeof(struct rtnl_link_stats64));
> +
> +	size += rtnl_link_get_af_stats_size(dev, filter_mask);
> +
> +	return size;
> +}
> +
> +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) {
> +	struct net *net = sock_net(skb->sk);
> +	struct if_stats_msg *ifsm;
> +	struct net_device *dev = NULL;
> +	struct sk_buff *nskb;
> +	u32 filter_mask;
> +	int err;
> +
> +	ifsm = nlmsg_data(nlh);
> +	if (ifsm->ifindex > 0)
> +		dev = __dev_get_by_index(net, ifsm->ifindex);
> +	else
> +		return -EINVAL;
> +
> +	if (!dev)
> +		return -ENODEV;
> +
> +	filter_mask = ifsm->filter_mask;
> +	if (!filter_mask)
> +		return -EINVAL;
> +
> +	nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask),
> GFP_KERNEL);
> +	if (!nskb)
> +		return -ENOBUFS;
> +
> +	err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
> +				  NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
> +				  0, filter_mask);
> +	if (err < 0) {
> +		/* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
> +		WARN_ON(err == -EMSGSIZE);
> +		kfree_skb(nskb);
> +	} else {
> +		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
> +	}
> +
> +	return err;
> +}
> +
> +static u16 rtnl_stats_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
> +{
> +	struct net *net = sock_net(skb->sk);
> +	struct net_device *dev;
> +	u16 min_ifinfo_dump_size = 0;
> +	struct if_stats_msg *ifsm;
> +	u32 filter_mask;
> +
> +	ifsm = nlmsg_data(nlh);
> +	filter_mask = ifsm->filter_mask;
> +
> +	/* traverse the list of net devices and compute the minimum
> +	 * buffer size based upon the filter mask.
> +	 */
> +	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> +		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
> +					     if_nlmsg_stats_size(dev,
> +								 filter_mask));
> +	}
> +
> +	return min_ifinfo_dump_size;
> +}
> +
> +static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback
> +*cb) {
> +	struct net *net = sock_net(skb->sk);
> +	struct if_stats_msg *ifsm;
> +	int h, s_h;
> +	int idx = 0, s_idx;
> +	struct net_device *dev;
> +	struct hlist_head *head;
> +	unsigned int flags = NLM_F_MULTI;
> +	u32 filter_mask = 0;
> +	int err;
> +
> +	s_h = cb->args[0];
> +	s_idx = cb->args[1];
> +
> +	cb->seq = net->dev_base_seq;
> +
> +	ifsm = nlmsg_data(cb->nlh);
> +	filter_mask = ifsm->filter_mask;
> +
> +	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
> +		idx = 0;
> +		head = &net->dev_index_head[h];
> +		hlist_for_each_entry(dev, head, index_hlist) {
> +			if (idx < s_idx)
> +				goto cont;
> +			err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
> +						  NETLINK_CB(cb->skb).portid,
> +						  cb->nlh->nlmsg_seq, 0,
> +						  flags, filter_mask);
> +			/* If we ran out of room on the first message,
> +			 * we're in trouble
> +			 */
> +			WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
> +
> +			if (err < 0)
> +				goto out;
> +
> +			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
> +cont:
> +			idx++;
> +		}
> +	}
> +out:
> +	cb->args[1] = idx;
> +	cb->args[0] = h;
> +
> +	return skb->len;
> +}
> +
>  /* Process one rtnetlink message. */
> 
>  static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> @@ -3559,4 +3756,7 @@ void __init rtnetlink_init(void)
>  	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink,
> NULL);
>  	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL,
> NULL);
>  	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL,
> NULL);
> +
> +	rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get,
> rtnl_stats_dump,
> +		      rtnl_stats_calcit);
>  }
> --
> 1.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ