lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 17 Nov 2011 19:17:28 -0800
From:	John Fastabend <john.r.fastabend@...el.com>
To:	Neil Horman <nhorman@...driver.com>
CC:	"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
	"Love, Robert W" <robert.w.love@...el.com>,
	"David S. Miller" <davem@...emloft.net>
Subject: Re: [PATCH 1/2] net: add network priority cgroup infrastructure (v2)

On 11/17/2011 1:47 PM, Neil Horman wrote:
> This patch adds in the infrastructure code to create the network priority
> cgroup.  The cgroup, in addition to the standard processes file creates two
> control files:
> 
> 1) prioidx - This is a read-only file that exports the index of this cgroup.
> This is a value that is both arbitrary and unique to a cgroup in this subsystem,
> and is used to index the per-device priority map
> 
> 2) priomap - This is a writeable file.  On read it reports a table of 2-tuples
> <name:priority> where name is the name of a network interface and priority is
> indicates the priority assigned to frames egresessing on the named interface and
> originating from a pid in this cgroup
> 
> This cgroup allows for skb priority to be set prior to a root qdisc getting
> selected. This is benenficial for DCB enabled systems, in that it allows for any
> application to use dcb configured priorities so without application modification
> 
> Signed-off-by: Neil Horman <nhorman@...driver.com>
> Signed-off-by: John Fastabend <john.r.fastabend@...el.com>
> CC: Robert Love <robert.w.love@...el.com>
> CC: "David S. Miller" <davem@...emloft.net>
> ---

one more nit... can we convert the rcu_dereference() into rtnl_dereference()
where it is relevant?

/**
 * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
 * @p: The pointer to read, prior to dereferencing
 *
 * Return the value of the specified RCU-protected pointer, but omit
 * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
 * caller holds RTNL.
 */
#define rtnl_dereference(p)                                     \
        rcu_dereference_protected(p, lockdep_rtnl_is_held())

[...]

> +
> +static void extend_netdev_table(struct net_device *dev, u32 new_len)
> +{
> +       size_t new_size = sizeof(struct netprio_map) +
> +                          ((sizeof(u32) * new_len));
> +       struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
> +       struct netprio_map *old_priomap;
> +       int i;
> +
> +       old_priomap  = rcu_dereference(dev->priomap);
> +

This could be rtnl_dereference(dev->priomap) to annotate that we always
have the rtnl lock here.

> +       if (!new_priomap) {
> +               printk(KERN_WARNING "Unable to alloc new priomap!\n");
> +               return;
> +       }
> +
> +       for (i = 0;
> +            old_priomap && (i < old_priomap->priomap_len);
> +            i++)
> +               new_priomap->priomap[i] = old_priomap->priomap[i];
> +
> +       new_priomap->priomap_len = new_len;
> +
> +       rcu_assign_pointer(dev->priomap, new_priomap);
> +       if (old_priomap)
> +               kfree_rcu(old_priomap, rcu);
> +}
> +
> +static void update_netdev_tables(void)
> +{
> +       struct net_device *dev;
> +       u32 max_len = atomic_read(&max_prioidx);
> +       struct netprio_map *map;
> +
> +       rtnl_lock();
          ^^^^^^^^^^^
> +
> +
> +       for_each_netdev(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);

same here rtnl_dereference(dev->priomap);

> +               if ((!map) ||
> +                   (map->priomap_len < max_len))
> +                       extend_netdev_table(dev, max_len);
> +       }
> +
> +       rtnl_unlock();
> +}
> +
> +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
> +                                                struct cgroup *cgrp)
> +{
> +       struct cgroup_netprio_state *cs;
> +       int ret;
> +
> +       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
> +       if (!cs)
> +               return ERR_PTR(-ENOMEM);
> +
> +       if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
> +               kfree(cs);
> +               return ERR_PTR(-EINVAL);
> +       }
> +
> +       ret = get_prioidx(&cs->prioidx);
> +       if (ret != 0) {
> +               printk(KERN_WARNING "No space in priority index array\n");
> +               kfree(cs);
> +               return ERR_PTR(ret);
> +       }
> +
> +       return &cs->css;
> +}
> +
> +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       struct cgroup_netprio_state *cs;
> +       struct net_device *dev;
> +       struct netprio_map *map;
> +
> +       cs = cgrp_netprio_state(cgrp);
> +       rtnl_lock();
> +       for_each_netdev(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);

map = rtnl_dereference(dev->priomap)

> +               if (map)
> +                       map->priomap[cs->prioidx] = 0;
> +       }
> +       rtnl_unlock();
> +       put_prioidx(cs->prioidx);
> +       kfree(cs);
> +}
> +
> +static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
> +{
> +       return (u64)cgrp_netprio_state(cgrp)->prioidx;
> +}
> +
> +static int read_priomap(struct cgroup *cont, struct cftype *cft,
> +                       struct cgroup_map_cb *cb)
> +{
> +       struct net_device *dev;
> +       u32 prioidx = cgrp_netprio_state(cont)->prioidx;
> +       u32 priority;
> +       struct netprio_map *map;
> +
> +       rcu_read_lock();
> +
> +       for_each_netdev_rcu(&init_net, dev) {
> +               map = rcu_dereference(dev->priomap);
> +               priority = map ? map->priomap[prioidx] : 0;
> +               cb->fill(cb, dev->name, priority);
> +       }
> +       rcu_read_unlock();
> +       return 0;
> +}
> +

[...]

> +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
> +{
> +       return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
> +}
> +
> +static int netprio_device_event(struct notifier_block *unused,
> +                               unsigned long event, void *ptr)
> +{
> +       struct net_device *dev = ptr;
> +       struct netprio_map *old;
> +       u32 max_len = atomic_read(&max_prioidx);
> +
> +       old = rcu_dereference_protected(dev->priomap, 1);

This is protected because of the rtnl lock so use,

old = rtnl_dereference(dev->priomap);

> +       /*
> +        * Note this is called with rtnl_lock held so we have update side
> +        * protection on our rcu assignments
> +        */
> +
> +       switch (event) {
> +
> +       case NETDEV_REGISTER:
> +               if (max_len)
> +                       extend_netdev_table(dev, max_len);
> +               break;
> +       case NETDEV_UNREGISTER:
> +               rcu_assign_pointer(dev->priomap, NULL);
> +               if (old)
> +                       kfree_rcu(old, rcu);
> +               break;
> +       }
> +       return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block netprio_device_notifier = {
> +       .notifier_call = netprio_device_event
> +};
> +

I can roll an update if you want, just let me know.

Thanks,
John
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ