lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 19 Jan 2009 17:57:17 -0800
From:	Paul Menage <menage@...gle.com>
To:	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
Cc:	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	"lizf@...fujitsu.com" <lizf@...fujitsu.com>,
	"balbir@...ux.vnet.ibm.com" <balbir@...ux.vnet.ibm.com>,
	"nishimura@....nes.nec.co.jp" <nishimura@....nes.nec.co.jp>,
	"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>
Subject: Re: [PATCH 1/4] cgroup: add CSS ID

On Thu, Jan 15, 2009 at 2:25 AM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@...fujitsu.com> wrote:
>
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>
>
> Patch for Per-CSS(Cgroup Subsys State) ID and private hierarchy code.
>
> This patch attaches unique ID to each css and provides following.
>
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>

Acked-by: Paul Menage <menage@...gle.com>

> ---
> Index: mmotm-2.6.29-Jan14/include/linux/cgroup.h
> ===================================================================
> --- mmotm-2.6.29-Jan14.orig/include/linux/cgroup.h
> +++ mmotm-2.6.29-Jan14/include/linux/cgroup.h
> @@ -15,6 +15,7 @@
>  #include <linux/cgroupstats.h>
>  #include <linux/prio_heap.h>
>  #include <linux/rwsem.h>
> +#include <linux/idr.h>
>
>  #ifdef CONFIG_CGROUPS
>
> @@ -22,6 +23,7 @@ struct cgroupfs_root;
>  struct cgroup_subsys;
>  struct inode;
>  struct cgroup;
> +struct css_id;
>
>  extern int cgroup_init_early(void);
>  extern int cgroup_init(void);
> @@ -59,6 +61,8 @@ struct cgroup_subsys_state {
>        atomic_t refcnt;
>
>        unsigned long flags;
> +       /* ID for this css, if possible */
> +       struct css_id *id;
>  };
>
>  /* bits in struct cgroup_subsys_state flags field */
> @@ -363,6 +367,11 @@ struct cgroup_subsys {
>        int active;
>        int disabled;
>        int early_init;
> +       /*
> +        * True if this subsys uses ID. ID is not available before cgroup_init()
> +        * (not available in early_init time.)
> +        */
> +       bool use_id;
>  #define MAX_CGROUP_TYPE_NAMELEN 32
>        const char *name;
>
> @@ -384,6 +393,9 @@ struct cgroup_subsys {
>         */
>        struct cgroupfs_root *root;
>        struct list_head sibling;
> +       /* used when use_id == true */
> +       struct idr idr;
> +       spinlock_t id_lock;
>  };
>
>  #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
> @@ -437,6 +449,44 @@ void cgroup_iter_end(struct cgroup *cgrp
>  int cgroup_scan_tasks(struct cgroup_scanner *scan);
>  int cgroup_attach_task(struct cgroup *, struct task_struct *);
>
> +/*
> + * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
> + * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
> + * CSS ID is assigned at cgroup allocation (create) automatically
> + * and removed when subsys calls free_css_id() function. This is because
> + * the lifetime of cgroup_subsys_state is subsys's matter.
> + *
> + * Looking up and scanning function should be called under rcu_read_lock().
> + * Taking cgroup_mutex()/hierarchy_mutex() is not necessary for following calls.
> + * But the css returned by this routine can be "not populated yet" or "being
> + * destroyed". The caller should check css and cgroup's status.
> + */
> +
> +/*
> + * Typically Called at ->destroy(), or somewhere the subsys frees
> + * cgroup_subsys_state.
> + */
> +void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
> +
> +/* Find a cgroup_subsys_state which has given ID */
> +
> +struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
> +
> +/*
> + * Get a cgroup whose id is greater than or equal to id under tree of root.
> + * Returning a cgroup_subsys_state or NULL.
> + */
> +struct cgroup_subsys_state *css_get_next(struct cgroup_subsys *ss, int id,
> +               struct cgroup_subsys_state *root, int *foundid);
> +
> +/* Returns true if root is ancestor of cg */
> +bool css_is_ancestor(struct cgroup_subsys_state *cg,
> +                    struct cgroup_subsys_state *root);
> +
> +/* Get id and depth of css */
> +unsigned short css_id(struct cgroup_subsys_state *css);
> +unsigned short css_depth(struct cgroup_subsys_state *css);
> +
>  #else /* !CONFIG_CGROUPS */
>
>  static inline int cgroup_init_early(void) { return 0; }
> Index: mmotm-2.6.29-Jan14/kernel/cgroup.c
> ===================================================================
> --- mmotm-2.6.29-Jan14.orig/kernel/cgroup.c
> +++ mmotm-2.6.29-Jan14/kernel/cgroup.c
> @@ -185,6 +185,8 @@ struct cg_cgroup_link {
>  static struct css_set init_css_set;
>  static struct cg_cgroup_link init_css_set_link;
>
> +static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
> +
>  /* css_set_lock protects the list of css_set objects, and the
>  * chain of tasks off each css_set.  Nests outside task->alloc_lock
>  * due to cgroup_iter_start() */
> @@ -567,6 +569,9 @@ static struct backing_dev_info cgroup_ba
>        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
>  };
>
> +static int alloc_css_id(struct cgroup_subsys *ss,
> +                       struct cgroup *parent, struct cgroup *child);
> +
>  static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
>  {
>        struct inode *inode = new_inode(sb);
> @@ -2335,6 +2340,7 @@ static void init_cgroup_css(struct cgrou
>        css->cgroup = cgrp;
>        atomic_set(&css->refcnt, 1);
>        css->flags = 0;
> +       css->id = NULL;
>        if (cgrp == dummytop)
>                set_bit(CSS_ROOT, &css->flags);
>        BUG_ON(cgrp->subsys[ss->subsys_id]);
> @@ -2410,6 +2416,10 @@ static long cgroup_create(struct cgroup
>                        goto err_destroy;
>                }
>                init_cgroup_css(css, ss, cgrp);
> +               if (ss->use_id)
> +                       if (alloc_css_id(ss, parent, cgrp))
> +                               goto err_destroy;
> +               /* At error, ->destroy() callback has to free assigned ID. */
>        }
>
>        cgroup_lock_hierarchy(root);
> @@ -2699,6 +2709,8 @@ int __init cgroup_init(void)
>                struct cgroup_subsys *ss = subsys[i];
>                if (!ss->early_init)
>                        cgroup_init_subsys(ss);
> +               if (ss->use_id)
> +                       cgroup_subsys_init_idr(ss);
>        }
>
>        /* Add init_css_set to the hash table */
> @@ -3232,3 +3244,259 @@ static int __init cgroup_disable(char *s
>        return 1;
>  }
>  __setup("cgroup_disable=", cgroup_disable);
> +
> +/*
> + * CSS ID -- ID per subsys's Cgroup Subsys State(CSS).
> + */
> +struct css_id {
> +       /*
> +        * The css to which this ID points. If cgroup is removed, this will
> +        * be NULL. This pointer is expected to be RCU-safe because destroy()
> +        * is called after synchronize_rcu(). But for safe use, css_is_removed()
> +        * css_tryget() should be used for avoiding race.
> +        */
> +       struct cgroup_subsys_state *css;
> +       /*
> +        * ID of this css.
> +        */
> +       unsigned short id;
> +       /*
> +        * Depth in hierarchy which this ID belongs to.
> +        */
> +       unsigned short depth;
> +       /*
> +        * ID is freed by RCU. (and lookup routine is RCU safe.)
> +        */
> +       struct rcu_head rcu_head;
> +       /*
> +        * Hierarchy of CSS ID belongs to.
> +        */
> +       unsigned short stack[0]; /* Array of Length (depth+1) */
> +};
> +#define CSS_ID_MAX     (65535)
> +
> +/*
> + * To get ID other than 0, this should be called when !cgroup_is_removed().
> + */
> +unsigned short css_id(struct cgroup_subsys_state *css)
> +{
> +       struct css_id *cssid = rcu_dereference(css->id);
> +
> +       if (cssid)
> +               return cssid->id;
> +       return 0;
> +}
> +
> +unsigned short css_depth(struct cgroup_subsys_state *css)
> +{
> +       struct css_id *cssid = rcu_dereference(css->id);
> +
> +       if (cssid)
> +               return cssid->depth;
> +       return 0;
> +}
> +
> +bool css_is_ancestor(struct cgroup_subsys_state *child,
> +                   struct cgroup_subsys_state *root)
> +{
> +       struct css_id *child_id = rcu_dereference(child->id);
> +       struct css_id *root_id = rcu_dereference(root->id);
> +
> +       if (!child_id || !root_id || (child_id->depth < root_id->depth))
> +               return false;
> +       return child_id->stack[root_id->depth] == root_id->id;
> +}
> +
> +static void __free_css_id_cb(struct rcu_head *head)
> +{
> +       struct css_id *id;
> +
> +       id = container_of(head, struct css_id, rcu_head);
> +       kfree(id);
> +}
> +
> +void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
> +{
> +       struct css_id *id = css->id;
> +       /* When this is called before css_id initialization, id can be NULL */
> +       if (!id)
> +               return;
> +
> +       BUG_ON(!ss->use_id);
> +
> +       rcu_assign_pointer(id->css, NULL);
> +       rcu_assign_pointer(css->id, NULL);
> +       spin_lock(&ss->id_lock);
> +       idr_remove(&ss->idr, id->id);
> +       spin_unlock(&ss->id_lock);
> +       call_rcu(&id->rcu_head, __free_css_id_cb);
> +}
> +
> +/*
> + * This is called by init or create(). Then, calls to this function are
> + * always serialized (By cgroup_mutex() at create()).
> + */
> +
> +static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
> +{
> +       struct css_id *newid;
> +       int myid, error, size;
> +
> +       BUG_ON(!ss->use_id);
> +
> +       size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
> +       newid = kzalloc(size, GFP_KERNEL);
> +       if (!newid)
> +               return ERR_PTR(-ENOMEM);
> +       /* get id */
> +       if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
> +               error = -ENOMEM;
> +               goto err_out;
> +       }
> +       spin_lock(&ss->id_lock);
> +       /* Don't use 0. allocates an ID of 1-65535 */
> +       error = idr_get_new_above(&ss->idr, newid, 1, &myid);
> +       spin_unlock(&ss->id_lock);
> +
> +       /* Returns error when there are no free spaces for new ID.*/
> +       if (error) {
> +               error = -ENOSPC;
> +               goto err_out;
> +       }
> +       if (myid > CSS_ID_MAX)
> +               goto remove_idr;
> +
> +       newid->id = myid;
> +       newid->depth = depth;
> +       return newid;
> +remove_idr:
> +       error = -ENOSPC;
> +       spin_lock(&ss->id_lock);
> +       idr_remove(&ss->idr, myid);
> +       spin_unlock(&ss->id_lock);
> +err_out:
> +       kfree(newid);
> +       return ERR_PTR(error);
> +
> +}
> +
> +static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
> +{
> +       struct css_id *newid;
> +       struct cgroup_subsys_state *rootcss;
> +
> +       spin_lock_init(&ss->id_lock);
> +       idr_init(&ss->idr);
> +
> +       rootcss = init_css_set.subsys[ss->subsys_id];
> +       newid = get_new_cssid(ss, 0);
> +       if (IS_ERR(newid))
> +               return PTR_ERR(newid);
> +
> +       newid->stack[0] = newid->id;
> +       newid->css = rootcss;
> +       rootcss->id = newid;
> +       return 0;
> +}
> +
> +static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
> +                       struct cgroup *child)
> +{
> +       int subsys_id, i, depth = 0;
> +       struct cgroup_subsys_state *parent_css, *child_css;
> +       struct css_id *child_id, *parent_id = NULL;
> +
> +       subsys_id = ss->subsys_id;
> +       parent_css = parent->subsys[subsys_id];
> +       child_css = child->subsys[subsys_id];
> +       depth = css_depth(parent_css) + 1;
> +       parent_id = parent_css->id;
> +
> +       child_id = get_new_cssid(ss, depth);
> +       if (IS_ERR(child_id))
> +               return PTR_ERR(child_id);
> +
> +       for (i = 0; i < depth; i++)
> +               child_id->stack[i] = parent_id->stack[i];
> +       child_id->stack[depth] = child_id->id;
> +
> +       rcu_assign_pointer(child_id->css, child_css);
> +       rcu_assign_pointer(child_css->id, child_id);
> +
> +       return 0;
> +}
> +
> +/**
> + * css_lookup - lookup css by id
> + * @ss: cgroup subsys to be looked into.
> + * @id: the id
> + *
> + * Returns pointer to cgroup_subsys_state if there is valid one with id.
> + * NULL if not. Should be called under rcu_read_lock()
> + */
> +struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
> +{
> +       struct css_id *cssid = NULL;
> +
> +       BUG_ON(!ss->use_id);
> +       cssid = idr_find(&ss->idr, id);
> +
> +       if (unlikely(!cssid))
> +               return NULL;
> +
> +       return rcu_dereference(cssid->css);
> +}
> +
> +/**
> + * css_get_next - lookup next cgroup under specified hierarchy.
> + * @ss: pointer to subsystem
> + * @id: current position of iteration.
> + * @root: pointer to css. search tree under this.
> + * @foundid: position of found object.
> + *
> + * Search next css under the specified hierarchy of rootid. Calling under
> + * rcu_read_lock() is necessary. Returns NULL if it reaches the end.
> + */
> +struct cgroup_subsys_state *
> +css_get_next(struct cgroup_subsys *ss, int id,
> +            struct cgroup_subsys_state *root, int *foundid)
> +{
> +       struct cgroup_subsys_state *ret = NULL;
> +       struct css_id *tmp;
> +       int tmpid;
> +       int rootid = css_id(root);
> +       int depth = css_depth(root);
> +
> +       if (!rootid)
> +               return NULL;
> +
> +       BUG_ON(!ss->use_id);
> +       rcu_read_lock();
> +       /* fill start point for scan */
> +       tmpid = id;
> +       while (1) {
> +               /*
> +                * scan next entry from bitmap(tree), tmpid is updated after
> +                * idr_get_next().
> +                */
> +               spin_lock(&ss->id_lock);
> +               tmp = idr_get_next(&ss->idr, &tmpid);
> +               spin_unlock(&ss->id_lock);
> +
> +               if (!tmp)
> +                       break;
> +               if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
> +                       ret = rcu_dereference(tmp->css);
> +                       if (ret) {
> +                               *foundid = tmpid;
> +                               break;
> +                       }
> +               }
> +               /* continue to scan from next id */
> +               tmpid = tmpid + 1;
> +       }
> +
> +       rcu_read_unlock();
> +       return ret;
> +}
> +
> Index: mmotm-2.6.29-Jan14/include/linux/idr.h
> ===================================================================
> --- mmotm-2.6.29-Jan14.orig/include/linux/idr.h
> +++ mmotm-2.6.29-Jan14/include/linux/idr.h
> @@ -106,6 +106,7 @@ int idr_get_new(struct idr *idp, void *p
>  int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
>  int idr_for_each(struct idr *idp,
>                 int (*fn)(int id, void *p, void *data), void *data);
> +void *idr_get_next(struct idr *idp, int *nextid);
>  void *idr_replace(struct idr *idp, void *ptr, int id);
>  void idr_remove(struct idr *idp, int id);
>  void idr_remove_all(struct idr *idp);
> Index: mmotm-2.6.29-Jan14/lib/idr.c
> ===================================================================
> --- mmotm-2.6.29-Jan14.orig/lib/idr.c
> +++ mmotm-2.6.29-Jan14/lib/idr.c
> @@ -579,6 +579,52 @@ int idr_for_each(struct idr *idp,
>  EXPORT_SYMBOL(idr_for_each);
>
>  /**
> + * idr_get_next - lookup next object of id to given id.
> + * @idp: idr handle
> + * @id:  pointer to lookup key
> + *
> + * Returns pointer to registered object with id, which is next number to
> + * given id.
> + */
> +
> +void *idr_get_next(struct idr *idp, int *nextidp)
> +{
> +       struct idr_layer *p, *pa[MAX_LEVEL];
> +       struct idr_layer **paa = &pa[0];
> +       int id = *nextidp;
> +       int n, max;
> +
> +       /* find first ent */
> +       n = idp->layers * IDR_BITS;
> +       max = 1 << n;
> +       p = rcu_dereference(idp->top);
> +       if (!p)
> +               return NULL;
> +
> +       while (id < max) {
> +               while (n > 0 && p) {
> +                       n -= IDR_BITS;
> +                       *paa++ = p;
> +                       p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
> +               }
> +
> +               if (p) {
> +                       *nextidp = id;
> +                       return p;
> +               }
> +
> +               id += 1 << n;
> +               while (n < fls(id)) {
> +                       n += IDR_BITS;
> +                       p = *--paa;
> +               }
> +       }
> +       return NULL;
> +}
> +
> +
> +
> +/**
>  * idr_replace - replace pointer for given id
>  * @idp: idr handle
>  * @ptr: pointer you want associated with the id
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ