[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130104103940.00001f49@unknown>
Date: Fri, 4 Jan 2013 10:39:40 -0800
From: Greg Rose <gregory.v.rose@...el.com>
To: Ben Hutchings <bhutchings@...arflare.com>
CC: Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
"David S. Miller" <davem@...emloft.net>, <stable@...r.kernel.org>,
<e1000-devel@...ts.sourceforge.net>, <netdev@...r.kernel.org>,
<linux-net-drivers@...arflare.com>
Subject: Re: [PATCH 3.0.y 2/3] rtnetlink: Fix problem with buffer allocation
On Fri, 4 Jan 2013 00:33:34 +0000
Ben Hutchings <bhutchings@...arflare.com> wrote:
> From: Greg Rose <gregory.v.rose@...el.com>
>
> commit 115c9b81928360d769a76c632bae62d15206a94a upstream.
>
> Implement a new netlink attribute type IFLA_EXT_MASK. The mask
> is a 32 bit value that can be used to indicate to the kernel that
> certain extended ifinfo values are requested by the user application.
> At this time the only mask value defined is RTEXT_FILTER_VF to
> indicate that the user wants the ifinfo dump to send information
> about the VFs belonging to the interface.
>
> This patch fixes a bug in which certain applications do not have
> large enough buffers to accommodate the extra information returned
> by the kernel with large numbers of SR-IOV virtual functions.
> Those applications will not send the new netlink attribute with
> the interface info dump request netlink messages so they will
> not get unexpectedly large request buffers returned by the kernel.
>
> Modifies the rtnl_calcit function to traverse the list of net
> devices and compute the minimum buffer size that can hold the
> info dumps of all matching devices based upon the filter passed
> in via the new netlink attribute filter mask. If no filter
> mask is sent then the buffer allocation defaults to NLMSG_GOODSIZE.
>
> With this change it is possible to add yet to be defined netlink
> attributes to the dump request which should make it fairly extensible
> in the future.
>
> Signed-off-by: Greg Rose <gregory.v.rose@...el.com>
> Signed-off-by: David S. Miller <davem@...emloft.net>
> [bwh: Backported to 3.0:
> - Adjust context
> - Drop the change in do_setlink() that reverts commit f18da1456581
> ('net: RTNETLINK adjusting values of min_ifinfo_dump_size'), which
> was never applied here]
> Signed-off-by: Ben Hutchings <bhutchings@...arflare.com>
Not sure if these need my ack or not, nevertheless...
Acked-by: Greg Rose <gregory.v.rose@...el.com)
And thanks for doing this work Ben.
- Greg
> ---
> include/linux/if_link.h | 1 +
> include/linux/rtnetlink.h | 3 ++
> include/net/rtnetlink.h | 2 +-
> net/core/rtnetlink.c | 77
> ++++++++++++++++++++++++++++++++++----------- 4 files changed, 63
> insertions(+), 20 deletions(-)
>
> diff --git a/include/linux/if_link.h b/include/linux/if_link.h
> index 0ee969a..61a48b5 100644
> --- a/include/linux/if_link.h
> +++ b/include/linux/if_link.h
> @@ -137,6 +137,7 @@ enum {
> IFLA_AF_SPEC,
> IFLA_GROUP, /* Group the device belongs to */
> IFLA_NET_NS_FD,
> + IFLA_EXT_MASK, /* Extended info mask, VFs,
> etc */ __IFLA_MAX
> };
>
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index bbad657..5415dfb 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -600,6 +600,9 @@ struct tcamsg {
> #define TCA_ACT_TAB 1 /* attr type must be >=1 */
> #define TCAA_MAX 1
>
> +/* New extended info filters for IFLA_EXT_MASK */
> +#define RTEXT_FILTER_VF (1 << 0)
> +
> /* End of information exported to user level */
>
> #ifdef __KERNEL__
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index 678f1ff..3702939 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -6,7 +6,7 @@
>
> typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
> void *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct
> netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff
> *); +typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct
> nlmsghdr *);
> extern int __rtnl_register(int protocol, int msgtype,
> rtnl_doit_func, rtnl_dumpit_func,
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 848de7b..e41ce2a 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -60,7 +60,6 @@ struct rtnl_link {
> };
>
> static DEFINE_MUTEX(rtnl_mutex);
> -static u16 min_ifinfo_dump_size;
>
> void rtnl_lock(void)
> {
> @@ -727,10 +726,11 @@ static void copy_rtnl_link_stats64(void *v,
> const struct rtnl_link_stats64 *b) }
>
> /* All VF info */
> -static inline int rtnl_vfinfo_size(const struct net_device *dev)
> +static inline int rtnl_vfinfo_size(const struct net_device *dev,
> + u32 ext_filter_mask)
> {
> - if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
> -
> + if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
> + (ext_filter_mask & RTEXT_FILTER_VF)) {
> int num_vfs = dev_num_vf(dev->dev.parent);
> size_t size = nla_total_size(sizeof(struct nlattr));
> size += nla_total_size(num_vfs * sizeof(struct
> nlattr)); @@ -768,7 +768,8 @@ static size_t rtnl_port_size(const
> struct net_device *dev) return port_self_size;
> }
>
> -static noinline size_t if_nlmsg_size(const struct net_device *dev)
> +static noinline size_t if_nlmsg_size(const struct net_device *dev,
> + u32 ext_filter_mask)
> {
> return NLMSG_ALIGN(sizeof(struct ifinfomsg))
> + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
> @@ -786,8 +787,9 @@ static noinline size_t if_nlmsg_size(const struct
> net_device *dev)
> + nla_total_size(4) /* IFLA_MASTER */
> + nla_total_size(1) /* IFLA_OPERSTATE */
> + nla_total_size(1) /* IFLA_LINKMODE */
> - + nla_total_size(4) /* IFLA_NUM_VF */
> - + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
> + + nla_total_size(ext_filter_mask
> + & RTEXT_FILTER_VF ? 4 : 0) /*
> IFLA_NUM_VF */
> + + rtnl_vfinfo_size(dev, ext_filter_mask) /*
> IFLA_VFINFO_LIST */
> + rtnl_port_size(dev) /* IFLA_VF_PORTS +
> IFLA_PORT_SELF */
> + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
> + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
> @@ -870,7 +872,7 @@ static int rtnl_port_fill(struct sk_buff *skb,
> struct net_device *dev)
> static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device
> *dev, int type, u32 pid, u32 seq, u32 change,
> - unsigned int flags)
> + unsigned int flags, u32 ext_filter_mask)
> {
> struct ifinfomsg *ifm;
> struct nlmsghdr *nlh;
> @@ -943,10 +945,11 @@ static int rtnl_fill_ifinfo(struct sk_buff
> *skb, struct net_device *dev, goto nla_put_failure;
> copy_rtnl_link_stats64(nla_data(attr), stats);
>
> - if (dev->dev.parent)
> + if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF))
> NLA_PUT_U32(skb, IFLA_NUM_VF,
> dev_num_vf(dev->dev.parent));
> - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
> + if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
> + && (ext_filter_mask & RTEXT_FILTER_VF)) {
> int i;
>
> struct nlattr *vfinfo, *vf;
> @@ -1033,11 +1036,20 @@ static int rtnl_dump_ifinfo(struct sk_buff
> *skb, struct netlink_callback *cb) struct net_device *dev;
> struct hlist_head *head;
> struct hlist_node *node;
> + struct nlattr *tb[IFLA_MAX+1];
> + u32 ext_filter_mask = 0;
>
> s_h = cb->args[0];
> s_idx = cb->args[1];
>
> rcu_read_lock();
> +
> + nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
> + ifla_policy);
> +
> + if (tb[IFLA_EXT_MASK])
> + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
> for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
> idx = 0;
> head = &net->dev_index_head[h];
> @@ -1047,7 +1059,8 @@ static int rtnl_dump_ifinfo(struct sk_buff
> *skb, struct netlink_callback *cb) if (rtnl_fill_ifinfo(skb, dev,
> RTM_NEWLINK, NETLINK_CB(cb->skb).pid,
> cb->nlh->nlmsg_seq, 0,
> - NLM_F_MULTI) <= 0)
> + NLM_F_MULTI,
> + ext_filter_mask) <= 0)
> goto out;
> cont:
> idx++;
> @@ -1081,6 +1094,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1]
> = { [IFLA_VF_PORTS] = { .type = NLA_NESTED },
> [IFLA_PORT_SELF] = { .type = NLA_NESTED },
> [IFLA_AF_SPEC] = { .type = NLA_NESTED },
> + [IFLA_EXT_MASK] = { .type = NLA_U32 },
> };
> EXPORT_SYMBOL(ifla_policy);
>
> @@ -1813,6 +1827,7 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) struct net_device *dev = NULL;
> struct sk_buff *nskb;
> int err;
> + u32 ext_filter_mask = 0;
>
> err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX,
> ifla_policy); if (err < 0)
> @@ -1821,6 +1836,9 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) if (tb[IFLA_IFNAME])
> nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
>
> + if (tb[IFLA_EXT_MASK])
> + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
> ifm = nlmsg_data(nlh);
> if (ifm->ifi_index > 0)
> dev = __dev_get_by_index(net, ifm->ifi_index);
> @@ -1832,12 +1850,12 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) if (dev == NULL)
> return -ENODEV;
>
> - nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
> + nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask),
> GFP_KERNEL); if (nskb == NULL)
> return -ENOBUFS;
>
> err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK,
> NETLINK_CB(skb).pid,
> - nlh->nlmsg_seq, 0, 0);
> + nlh->nlmsg_seq, 0, 0,
> ext_filter_mask); if (err < 0) {
> /* -EMSGSIZE implies BUG in if_nlmsg_size */
> WARN_ON(err == -EMSGSIZE);
> @@ -1848,8 +1866,31 @@ static int rtnl_getlink(struct sk_buff *skb,
> struct nlmsghdr* nlh, void *arg) return err;
> }
>
> -static u16 rtnl_calcit(struct sk_buff *skb)
> +static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
> {
> + struct net *net = sock_net(skb->sk);
> + struct net_device *dev;
> + struct nlattr *tb[IFLA_MAX+1];
> + u32 ext_filter_mask = 0;
> + u16 min_ifinfo_dump_size = 0;
> +
> + nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
> ifla_policy); +
> + if (tb[IFLA_EXT_MASK])
> + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
> +
> + if (!ext_filter_mask)
> + return NLMSG_GOODSIZE;
> + /*
> + * traverse the list of net devices and compute the minimum
> + * buffer size based upon the filter mask.
> + */
> + list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> + min_ifinfo_dump_size = max_t(u16,
> min_ifinfo_dump_size,
> + if_nlmsg_size(dev,
> +
> ext_filter_mask));
> + }
> +
> return min_ifinfo_dump_size;
> }
>
> @@ -1884,13 +1925,11 @@ void rtmsg_ifinfo(int type, struct net_device
> *dev, unsigned change) int err = -ENOBUFS;
> size_t if_info_size;
>
> - skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)),
> GFP_KERNEL);
> + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)),
> GFP_KERNEL); if (skb == NULL)
> goto errout;
>
> - min_ifinfo_dump_size = max_t(u16, if_info_size,
> min_ifinfo_dump_size); -
> - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
> + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0);
> if (err < 0) {
> /* -EMSGSIZE implies BUG in if_nlmsg_size() */
> WARN_ON(err == -EMSGSIZE);
> @@ -1948,7 +1987,7 @@ static int rtnetlink_rcv_msg(struct sk_buff
> *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP;
> calcit = rtnl_get_calcit(family, type);
> if (calcit)
> - min_dump_alloc = calcit(skb);
> + min_dump_alloc = calcit(skb, nlh);
>
> __rtnl_unlock();
> rtnl = net->rtnl;
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists