lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 4 Jan 2013 10:39:40 -0800
From:	Greg Rose <gregory.v.rose@...el.com>
To:	Ben Hutchings <bhutchings@...arflare.com>
CC:	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	"David S. Miller" <davem@...emloft.net>, <stable@...r.kernel.org>,
	<e1000-devel@...ts.sourceforge.net>, <netdev@...r.kernel.org>,
	<linux-net-drivers@...arflare.com>
Subject: Re: [PATCH 3.0.y 2/3] rtnetlink: Fix problem with buffer allocation

On Fri, 4 Jan 2013 00:33:34 +0000
Ben Hutchings <bhutchings@...arflare.com> wrote:

> From: Greg Rose <gregory.v.rose@...el.com>
> 
> commit 115c9b81928360d769a76c632bae62d15206a94a upstream.
> 
> Implement a new netlink attribute type IFLA_EXT_MASK.  The mask
> is a 32 bit value that can be used to indicate to the kernel that
> certain extended ifinfo values are requested by the user application.
> At this time the only mask value defined is RTEXT_FILTER_VF to
> indicate that the user wants the ifinfo dump to send information
> about the VFs belonging to the interface.
> 
> This patch fixes a bug in which certain applications do not have
> large enough buffers to accommodate the extra information returned
> by the kernel with large numbers of SR-IOV virtual functions.
> Those applications will not send the new netlink attribute with
> the interface info dump request netlink messages so they will
> not get unexpectedly large request buffers returned by the kernel.
> 
> Modifies the rtnl_calcit function to traverse the list of net
> devices and compute the minimum buffer size that can hold the
> info dumps of all matching devices based upon the filter passed
> in via the new netlink attribute filter mask.  If no filter
> mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE.
> 
> With this change it is possible to add yet to be defined netlink
> attributes to the dump request which should make it fairly extensible
> in the future.
> 
> Signed-off-by: Greg Rose <gregory.v.rose@...el.com>
> Signed-off-by: David S. Miller <davem@...emloft.net>
> [bwh: Backported to 3.0:
>  - Adjust context
>  - Drop the change in do_setlink() that reverts commit f18da1456581
>    ('net: RTNETLINK adjusting values of min_ifinfo_dump_size'), which
>    was never applied here]
> Signed-off-by: Ben Hutchings <bhutchings@...arflare.com>

Not sure if these need my ack or not, nevertheless...

Acked-by: Greg Rose <gregory.v.rose@...el.com)

And thanks for doing this work Ben.

- Greg

> ---
>  include/linux/if_link.h   |    1 +
>  include/linux/rtnetlink.h |    3 ++
>  include/net/rtnetlink.h   |    2 +-
>  net/core/rtnetlink.c      |   77
> ++++++++++++++++++++++++++++++++++----------- 4 files changed, 63
> insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/if_link.h b/include/linux/if_link.h
> index 0ee969a..61a48b5 100644
> --- a/include/linux/if_link.h
> +++ b/include/linux/if_link.h
> @@ -137,6 +137,7 @@ enum {
>  	IFLA_AF_SPEC,
>  	IFLA_GROUP,		/* Group the device belongs to */
>  	IFLA_NET_NS_FD,
> +	IFLA_EXT_MASK,		/* Extended info mask, VFs,
> etc */ __IFLA_MAX
>  };
>  
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index bbad657..5415dfb 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -600,6 +600,9 @@ struct tcamsg {
>  #define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>  #define TCAA_MAX 1
>  
> +/* New extended info filters for IFLA_EXT_MASK */
> +#define RTEXT_FILTER_VF		(1 << 0)
> +
>  /* End of information exported to user level */
>  
>  #ifdef __KERNEL__
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index 678f1ff..3702939 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -6,7 +6,7 @@
>  
>  typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
> void *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct
> netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff
> *); +typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct
> nlmsghdr *); 
>  extern int	__rtnl_register(int protocol, int msgtype,
>  				rtnl_doit_func, rtnl_dumpit_func,
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 848de7b..e41ce2a 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -60,7 +60,6 @@ struct rtnl_link {
>  };
>  
>  static DEFINE_MUTEX(rtnl_mutex);
> -static u16 min_ifinfo_dump_size;
>  
>  void rtnl_lock(void)
>  {
> @@ -727,10 +726,11 @@ static void copy_rtnl_link_stats64(void *v,
> const struct rtnl_link_stats64 *b) }
>  
>  /* All VF info */
> -static inline int rtnl_vfinfo_size(const struct net_device *dev)
> +static inline int rtnl_vfinfo_size(const struct net_device *dev,
> +				   u32 ext_filter_mask)
>  {
> -	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
> -
> +	if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
> +	    (ext_filter_mask & RTEXT_FILTER_VF)) {
>  		int num_vfs = dev_num_vf(dev->dev.parent);
>  		size_t size = nla_total_size(sizeof(struct nlattr));
>  		size += nla_total_size(num_vfs * sizeof(struct
> nlattr)); @@ -768,7 +768,8 @@ static size_t rtnl_port_size(const
> struct net_device *dev) return port_self_size;
>  }
>  
> -static noinline size_t if_nlmsg_size(const struct net_device *dev)
> +static noinline size_t if_nlmsg_size(const struct net_device *dev,
> +				     u32 ext_filter_mask)
>  {
>  	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
>  	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
> @@ -786,8 +787,9 @@ static noinline size_t if_nlmsg_size(const struct
> net_device *dev)
>  	       + nla_total_size(4) /* IFLA_MASTER */
>  	       + nla_total_size(1) /* IFLA_OPERSTATE */
>  	       + nla_total_size(1) /* IFLA_LINKMODE */
> -	       + nla_total_size(4) /* IFLA_NUM_VF */
> -	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
> +	       + nla_total_size(ext_filter_mask
> +			        & RTEXT_FILTER_VF ? 4 : 0) /*
> IFLA_NUM_VF */
> +	       + rtnl_vfinfo_size(dev, ext_filter_mask) /*
> IFLA_VFINFO_LIST */
>  	       + rtnl_port_size(dev) /* IFLA_VF_PORTS +
> IFLA_PORT_SELF */
>  	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
>  	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
> @@ -870,7 +872,7 @@ static int rtnl_port_fill(struct sk_buff *skb,
> struct net_device *dev) 
>  static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device
> *dev, int type, u32 pid, u32 seq, u32 change,
> -			    unsigned int flags)
> +			    unsigned int flags, u32 ext_filter_mask)
>  {
>  	struct ifinfomsg *ifm;
>  	struct nlmsghdr *nlh;
> @@ -943,10 +945,11 @@ static int rtnl_fill_ifinfo(struct sk_buff
> *skb, struct net_device *dev, goto nla_put_failure;
>  	copy_rtnl_link_stats64(nla_data(attr), stats);
>  
> -	if (dev->dev.parent)
> +	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
>  		NLA_PUT_U32(skb, IFLA_NUM_VF,
> dev_num_vf(dev->dev.parent)); 
> -	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
> +	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
> +	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
>  		int i;
>  
>  		struct nlattr *vfinfo, *vf;
> @@ -1033,11 +1036,20 @@ static int rtnl_dump_ifinfo(struct sk_buff
> *skb, struct netlink_callback *cb) struct net_device *dev;
>  	struct hlist_head *head;
>  	struct hlist_node *node;
> +	struct nlattr *tb[IFLA_MAX+1];
> +	u32 ext_filter_mask = 0;
>  
>  	s_h = cb->args[0];
>  	s_idx = cb->args[1];
>  
>  	rcu_read_lock();
> +
> +	nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
> +		    ifla_policy);
> +
> +	if (tb[IFLA_EXT_MASK])
> +		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
>  	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
>  		idx = 0;
>  		head = &net->dev_index_head[h];
> @@ -1047,7 +1059,8 @@ static int rtnl_dump_ifinfo(struct sk_buff
> *skb, struct netlink_callback *cb) if (rtnl_fill_ifinfo(skb, dev,
> RTM_NEWLINK, NETLINK_CB(cb->skb).pid,
>  					     cb->nlh->nlmsg_seq, 0,
> -					     NLM_F_MULTI) <= 0)
> +					     NLM_F_MULTI,
> +					     ext_filter_mask) <= 0)
>  				goto out;
>  cont:
>  			idx++;
> @@ -1081,6 +1094,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1]
> = { [IFLA_VF_PORTS]		= { .type = NLA_NESTED },
>  	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
>  	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
> +	[IFLA_EXT_MASK]		= { .type = NLA_U32 },
>  };
>  EXPORT_SYMBOL(ifla_policy);
>  
> @@ -1813,6 +1827,7 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) struct net_device *dev = NULL;
>  	struct sk_buff *nskb;
>  	int err;
> +	u32 ext_filter_mask = 0;
>  
>  	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX,
> ifla_policy); if (err < 0)
> @@ -1821,6 +1836,9 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) if (tb[IFLA_IFNAME])
>  		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
>  
> +	if (tb[IFLA_EXT_MASK])
> +		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
>  	ifm = nlmsg_data(nlh);
>  	if (ifm->ifi_index > 0)
>  		dev = __dev_get_by_index(net, ifm->ifi_index);
> @@ -1832,12 +1850,12 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) if (dev == NULL)
>  		return -ENODEV;
>  
> -	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
> +	nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask),
> GFP_KERNEL); if (nskb == NULL)
>  		return -ENOBUFS;
>  
>  	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK,
> NETLINK_CB(skb).pid,
> -			       nlh->nlmsg_seq, 0, 0);
> +			       nlh->nlmsg_seq, 0, 0,
> ext_filter_mask); if (err < 0) {
>  		/* -EMSGSIZE implies BUG in if_nlmsg_size */
>  		WARN_ON(err == -EMSGSIZE);
> @@ -1848,8 +1866,31 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) return err;
>  }
>  
> -static u16 rtnl_calcit(struct sk_buff *skb)
> +static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
>  {
> +	struct net *net = sock_net(skb->sk);
> +	struct net_device *dev;
> +	struct nlattr *tb[IFLA_MAX+1];
> +	u32 ext_filter_mask = 0;
> +	u16 min_ifinfo_dump_size = 0;
> +
> +	nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
> ifla_policy); +
> +	if (tb[IFLA_EXT_MASK])
> +		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
> +	if (!ext_filter_mask)
> +		return NLMSG_GOODSIZE;
> +	/*
> +	 * traverse the list of net devices and compute the minimum
> +	 * buffer size based upon the filter mask.
> +	 */
> +	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> +		min_ifinfo_dump_size = max_t(u16,
> min_ifinfo_dump_size,
> +					     if_nlmsg_size(dev,
> +
> ext_filter_mask));
> +	}
> +
>  	return min_ifinfo_dump_size;
>  }
>  
> @@ -1884,13 +1925,11 @@ void rtmsg_ifinfo(int type, struct net_device
> *dev, unsigned change) int err = -ENOBUFS;
>  	size_t if_info_size;
>  
> -	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)),
> GFP_KERNEL);
> +	skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)),
> GFP_KERNEL); if (skb == NULL)
>  		goto errout;
>  
> -	min_ifinfo_dump_size = max_t(u16, if_info_size,
> min_ifinfo_dump_size); -
> -	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
> +	err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
>  	if (err < 0) {
>  		/* -EMSGSIZE implies BUG in if_nlmsg_size() */
>  		WARN_ON(err == -EMSGSIZE);
> @@ -1948,7 +1987,7 @@ static int rtnetlink_rcv_msg(struct sk_buff
> *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP;
>  		calcit = rtnl_get_calcit(family, type);
>  		if (calcit)
> -			min_dump_alloc = calcit(skb);
> +			min_dump_alloc = calcit(skb, nlh);
>  
>  		__rtnl_unlock();
>  		rtnl = net->rtnl;
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists