lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 1 Dec 2009 05:34:37 +0530
From:	Divyesh Shah <dpshah@...gle.com>
To:	Vivek Goyal <vgoyal@...hat.com>
Cc:	linux-kernel@...r.kernel.org, jens.axboe@...cle.com,
	nauman@...gle.com, lizf@...fujitsu.com, ryov@...inux.co.jp,
	fernando@....ntt.co.jp, s-uchida@...jp.nec.com, taka@...inux.co.jp,
	guijianfeng@...fujitsu.com, jmoyer@...hat.com,
	righi.andrea@...il.com, m-ikeda@...jp.nec.com, czoccolo@...il.com,
	Alan.Brunelle@...com
Subject: Re: [PATCH 06/21] blkio: Introduce blkio controller cgroup interface

On Mon, Nov 30, 2009 at 8:29 AM, Vivek Goyal <vgoyal@...hat.com> wrote:
> o This is basic implementation of blkio controller cgroup interface. This is
>  the common interface visible to user space and should be used by different
>  IO control policies as we implement those.
>
> Signed-off-by: Vivek Goyal <vgoyal@...hat.com>
> ---
>  block/Kconfig                 |   13 +++
>  block/Kconfig.iosched         |    1 +
>  block/Makefile                |    1 +
>  block/blk-cgroup.c            |  177 +++++++++++++++++++++++++++++++++++++++++
>  block/blk-cgroup.h            |   58 +++++++++++++
>  include/linux/cgroup_subsys.h |    6 ++
>  include/linux/iocontext.h     |    4 +
>  7 files changed, 260 insertions(+), 0 deletions(-)
>  create mode 100644 block/blk-cgroup.c
>  create mode 100644 block/blk-cgroup.h
>
> diff --git a/block/Kconfig b/block/Kconfig
> index 9be0b56..6ba1a8e 100644
> --- a/block/Kconfig
> +++ b/block/Kconfig
> @@ -77,6 +77,19 @@ config BLK_DEV_INTEGRITY
>        T10/SCSI Data Integrity Field or the T13/ATA External Path
>        Protection.  If in doubt, say N.
>
> +config BLK_CGROUP
> +       bool
> +       depends on CGROUPS
> +       default n
> +       ---help---
> +       Generic block IO controller cgroup interface. This is the common
> +       cgroup interface which should be used by various IO controlling
> +       policies.
> +
> +       Currently, CFQ IO scheduler uses it to recognize task groups and
> +       control disk bandwidth allocation (proportional time slice allocation)
> +       to such task groups.
> +
>  endif # BLOCK
>
>  config BLOCK_COMPAT
> diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
> index 8bd1051..be0280d 100644
> --- a/block/Kconfig.iosched
> +++ b/block/Kconfig.iosched
> @@ -23,6 +23,7 @@ config IOSCHED_DEADLINE
>
>  config IOSCHED_CFQ
>        tristate "CFQ I/O scheduler"
> +       select BLK_CGROUP
>        default y
>        ---help---
>          The CFQ I/O scheduler tries to distribute bandwidth equally
> diff --git a/block/Makefile b/block/Makefile
> index 7914108..cb2d515 100644
> --- a/block/Makefile
> +++ b/block/Makefile
> @@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
>                        blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
>
>  obj-$(CONFIG_BLK_DEV_BSG)      += bsg.o
> +obj-$(CONFIG_BLK_CGROUP)       += blk-cgroup.o
>  obj-$(CONFIG_IOSCHED_NOOP)     += noop-iosched.o
>  obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
>  obj-$(CONFIG_IOSCHED_CFQ)      += cfq-iosched.o
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> new file mode 100644
> index 0000000..4f6afd7
> --- /dev/null
> +++ b/block/blk-cgroup.c
> @@ -0,0 +1,177 @@
> +/*
> + * Common Block IO controller cgroup interface
> + *
> + * Based on ideas and code from CFQ, CFS and BFQ:
> + * Copyright (C) 2003 Jens Axboe <axboe@...nel.dk>
> + *
> + * Copyright (C) 2008 Fabio Checconi <fabio@...dalf.sssup.it>
> + *                   Paolo Valente <paolo.valente@...more.it>
> + *
> + * Copyright (C) 2009 Vivek Goyal <vgoyal@...hat.com>
> + *                   Nauman Rafique <nauman@...gle.com>
> + */
> +#include <linux/ioprio.h>
> +#include "blk-cgroup.h"
> +
> +struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };

This should use BLKIO_WEIGHT_MAX as 2*BLKIO_WEIGHT_DEFAULT is same as
BLKIO_WEIGHT_MAX unless there is a reason why you would want the value
to remain as a multiple of default_weight instead of max in case the
constants change later.

> +
> +struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
> +{
> +       return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
> +                           struct blkio_cgroup, css);
> +}
> +
> +void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> +                               struct blkio_group *blkg, void *key)
> +{
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&blkcg->lock, flags);
> +       rcu_assign_pointer(blkg->key, key);
> +       hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
> +       spin_unlock_irqrestore(&blkcg->lock, flags);
> +}
> +
> +int blkiocg_del_blkio_group(struct blkio_group *blkg)
> +{
> +       /* Implemented later */
> +       return 0;
> +}
> +
> +/* called under rcu_read_lock(). */
> +struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
> +{
> +       struct blkio_group *blkg;
> +       struct hlist_node *n;
> +       void *__key;
> +
> +       hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +               __key = blkg->key;
> +               if (__key == key)
> +                       return blkg;
> +       }
> +
> +       return NULL;
> +}
> +
> +#define SHOW_FUNCTION(__VAR)                                           \
> +static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup,               \
> +                                      struct cftype *cftype)           \
> +{                                                                      \
> +       struct blkio_cgroup *blkcg;                                     \
> +                                                                       \
> +       blkcg = cgroup_to_blkio_cgroup(cgroup);                         \
> +       return (u64)blkcg->__VAR;                                       \
> +}
> +
> +SHOW_FUNCTION(weight);
> +#undef SHOW_FUNCTION
> +
> +static int
> +blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
> +{
> +       struct blkio_cgroup *blkcg;
> +
> +       if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
> +               return -EINVAL;
> +
> +       blkcg = cgroup_to_blkio_cgroup(cgroup);
> +       blkcg->weight = (unsigned int)val;
> +       return 0;
> +}
> +
> +struct cftype blkio_files[] = {
> +       {
> +               .name = "weight",
> +               .read_u64 = blkiocg_weight_read,
> +               .write_u64 = blkiocg_weight_write,
> +       },
> +};
> +
> +static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
> +{
> +       return cgroup_add_files(cgroup, subsys, blkio_files,
> +                               ARRAY_SIZE(blkio_files));
> +}
> +
> +static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
> +{
> +       struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
> +
> +       free_css_id(&blkio_subsys, &blkcg->css);
> +       kfree(blkcg);
> +}
> +
> +static struct cgroup_subsys_state *
> +blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
> +{
> +       struct blkio_cgroup *blkcg, *parent_blkcg;
> +
> +       if (!cgroup->parent) {
> +               blkcg = &blkio_root_cgroup;
> +               goto done;
> +       }
> +
> +       /* Currently we do not support hierarchy deeper than two level (0,1) */
> +       parent_blkcg = cgroup_to_blkio_cgroup(cgroup->parent);
> +       if (css_depth(&parent_blkcg->css) > 0)
> +               return ERR_PTR(-EINVAL);
> +
> +       blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
> +       if (!blkcg)
> +               return ERR_PTR(-ENOMEM);
> +
> +       blkcg->weight = BLKIO_WEIGHT_DEFAULT;
> +done:
> +       spin_lock_init(&blkcg->lock);
> +       INIT_HLIST_HEAD(&blkcg->blkg_list);
> +
> +       return &blkcg->css;
> +}
> +
> +/*
> + * We cannot support shared io contexts, as we have no mean to support
> + * two tasks with the same ioc in two different groups without major rework
> + * of the main cic data structures.  For now we allow a task to change
> + * its cgroup only if it's the only owner of its ioc.
> + */
> +static int blkiocg_can_attach(struct cgroup_subsys *subsys,
> +                               struct cgroup *cgroup, struct task_struct *tsk,
> +                               bool threadgroup)
> +{
> +       struct io_context *ioc;
> +       int ret = 0;
> +
> +       /* task_lock() is needed to avoid races with exit_io_context() */
> +       task_lock(tsk);
> +       ioc = tsk->io_context;
> +       if (ioc && atomic_read(&ioc->nr_tasks) > 1)
> +               ret = -EINVAL;
> +       task_unlock(tsk);
> +
> +       return ret;
> +}
> +
> +static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
> +                               struct cgroup *prev, struct task_struct *tsk,
> +                               bool threadgroup)
> +{
> +       struct io_context *ioc;
> +
> +       task_lock(tsk);
> +       ioc = tsk->io_context;
> +       if (ioc)
> +               ioc->cgroup_changed = 1;
> +       task_unlock(tsk);
> +}
> +
> +struct cgroup_subsys blkio_subsys = {
> +       .name = "blkio",
> +       .create = blkiocg_create,
> +       .can_attach = blkiocg_can_attach,
> +       .attach = blkiocg_attach,
> +       .destroy = blkiocg_destroy,
> +       .populate = blkiocg_populate,
> +       .subsys_id = blkio_subsys_id,
> +       .use_id = 1,
> +};
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> new file mode 100644
> index 0000000..ba5703f
> --- /dev/null
> +++ b/block/blk-cgroup.h
> @@ -0,0 +1,58 @@
> +#ifndef _BLK_CGROUP_H
> +#define _BLK_CGROUP_H
> +/*
> + * Common Block IO controller cgroup interface
> + *
> + * Based on ideas and code from CFQ, CFS and BFQ:
> + * Copyright (C) 2003 Jens Axboe <axboe@...nel.dk>
> + *
> + * Copyright (C) 2008 Fabio Checconi <fabio@...dalf.sssup.it>
> + *                   Paolo Valente <paolo.valente@...more.it>
> + *
> + * Copyright (C) 2009 Vivek Goyal <vgoyal@...hat.com>
> + *                   Nauman Rafique <nauman@...gle.com>
> + */
> +
> +#include <linux/cgroup.h>
> +
> +struct blkio_cgroup {
> +       struct cgroup_subsys_state css;
> +       unsigned int weight;
> +       spinlock_t lock;
> +       struct hlist_head blkg_list;
> +};
> +
> +struct blkio_group {
> +       /* An rcu protected unique identifier for the group */
> +       void *key;
> +       struct hlist_node blkcg_node;
> +};
> +
> +#define BLKIO_WEIGHT_MIN       100
> +#define BLKIO_WEIGHT_MAX       1000
> +#define BLKIO_WEIGHT_DEFAULT   500
> +
> +#ifdef CONFIG_BLK_CGROUP
> +extern struct blkio_cgroup blkio_root_cgroup;
> +extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
> +extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> +                               struct blkio_group *blkg, void *key);
> +extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
> +extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
> +                                               void *key);
> +#else
> +static inline struct blkio_cgroup *
> +cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
> +
> +static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
> +                       struct blkio_group *blkg, void *key)
> +{
> +}
> +
> +static inline int
> +blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
> +
> +static inline struct blkio_group *
> +blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
> +#endif
> +#endif /* _BLK_CGROUP_H */
> diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
> index 9c8d31b..ccefff0 100644
> --- a/include/linux/cgroup_subsys.h
> +++ b/include/linux/cgroup_subsys.h
> @@ -60,3 +60,9 @@ SUBSYS(net_cls)
>  #endif
>
>  /* */
> +
> +#ifdef CONFIG_BLK_CGROUP
> +SUBSYS(blkio)
> +#endif
> +
> +/* */
> diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
> index eb73632..d61b0b8 100644
> --- a/include/linux/iocontext.h
> +++ b/include/linux/iocontext.h
> @@ -68,6 +68,10 @@ struct io_context {
>        unsigned short ioprio;
>        unsigned short ioprio_changed;
>
> +#ifdef CONFIG_BLK_CGROUP
> +       unsigned short cgroup_changed;
> +#endif
> +
>        /*
>         * For request batching
>         */
> --
> 1.6.2.5
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ