lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 3 Mar 2010 10:36:35 -0500
From:	Vivek Goyal <vgoyal@...hat.com>
To:	Gui Jianfeng <guijianfeng@...fujitsu.com>
Cc:	Jens Axboe <jens.axboe@...cle.com>,
	linux kernel mailing list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 2/1] io-controller: Add a new interface "policy" for IO
	Controller

On Wed, Mar 03, 2010 at 02:21:11PM +0800, Gui Jianfeng wrote:
> Currently, IO Controller makes use of blkio.weight to assign weight for
> all devices. Here a new user interface "blkio.policy" is introduced to 
> assign different weights for different devices. blkio.weight becomes the
> default value for devices which are not configured by blkio.policy.
> 
> You can use the following format to assigned specific weight for a given
> device:
> #echo "major:minor weight" > blkio.policy
> 
> major:minor represents device number.
> 
> And you can remove a policy as following:
> #echo "major:minor 0" > blkio.policy
> 
> Signed-off-by: Gui Jianfeng <guijianfeng@...fujitsu.com>
> ---
>  block/blk-cgroup.c  |  248 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/blk-cgroup.h  |    9 ++
>  block/cfq-iosched.c |    2 +-
>  3 files changed, 258 insertions(+), 1 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index e7dbbaf..1fddb98 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -16,6 +16,7 @@
>  #include <linux/module.h>
>  #include <linux/err.h>
>  #include "blk-cgroup.h"
> +#include <linux/genhd.h>
>  
>  static DEFINE_SPINLOCK(blkio_list_lock);
>  static LIST_HEAD(blkio_list);
> @@ -23,6 +24,12 @@ static LIST_HEAD(blkio_list);
>  struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
>  EXPORT_SYMBOL_GPL(blkio_root_cgroup);
>  
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct io_policy_node *pn);
					     ^^^^^^^^^^
How about blkio_policy_node?

So this structure will represent per cgroup per device policy as specified
by user. We can re-use the same structure if there are more per cgroup
per device policies down the line. 

> +static inline void policy_delete_node(struct io_policy_node *pn);
> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev);

Not very sure about it but may be a good idea to prefix "blkio_" or
"blkcg_" before all the functions you are defining. Just some uniformity in
function names like cfq.

> +
>  bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)

You might want to rebase it on Jen's for-linus branch as blkiocg_css_tryget()
is gone in that branch

Apart from above minor nits, this patch looks good to me. Will do some
testing on it.

Thanks
Vivek

>  {
>  	if (!css_tryget(&blkcg->css))
> @@ -142,6 +149,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	struct blkio_group *blkg;
>  	struct hlist_node *n;
>  	struct blkio_policy_type *blkiop;
> +	struct io_policy_node *pn;
>  
>  	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
>  		return -EINVAL;
> @@ -150,7 +158,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
>  	spin_lock(&blkio_list_lock);
>  	spin_lock_irq(&blkcg->lock);
>  	blkcg->weight = (unsigned int)val;
> +
>  	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		pn = policy_search_node(blkcg, blkg->dev);
> +
> +		if (pn)
> +			continue;

Ok, so if we have specified a weight for a device, then blkio.weight
update does not affect the weight on that device. Makes sense.

> +
>  		list_for_each_entry(blkiop, &blkio_list, list)
>  			blkiop->ops.blkio_update_group_weight_fn(blkg,
>  					blkcg->weight);
> @@ -199,8 +213,235 @@ void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
>  EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
>  #endif
>  
> +static int check_dev_num(dev_t dev)
> +{
> +	int part = 0;
> +	struct gendisk *disk;
> +
> +	disk = get_gendisk(dev, &part);
> +
> +	if (!disk || part)
> +		return -ENODEV;
> +
> +	return 0;
> +}
> +
> +static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
> +{
> +	char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> +	int ret;
> +	unsigned long major, minor, temp;
> +	int i = 0;
> +	dev_t dev;
> +
> +	memset(s, 0, sizeof(s));
> +
> +	while ((p = strsep(&buf, " ")) != NULL) {
> +		if (!*p)
> +			continue;
> +
> +		s[i++] = p;
> +
> +		/* Prevent from inputing too many things */
> +		if (i == 3)
> +			break;
> +	}
> +
> +	if (i != 2)
> +		return -EINVAL;
> +
> +	p = strsep(&s[0], ":");
> +
> +	if (p != NULL)
> +		major_s = p;
> +	else
> +		return -EINVAL;
> +
> +	minor_s = s[0];
> +
> +	if (!minor_s)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(major_s, 10, &major);
> +	if (ret)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(minor_s, 10, &minor);
> +	if (ret)
> +		return -EINVAL;
> +
> +	dev = MKDEV(major, minor);
> +
> +	ret = check_dev_num(dev);
> +	if (ret)
> +		return ret;
> +
> +	newpn->dev = dev;
> +
> +	if (s[1] == NULL)
> +		return -EINVAL;
> +
> +	ret = strict_strtoul(s[1], 10, &temp);
> +	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> +	    temp > BLKIO_WEIGHT_MAX)
> +		return -EINVAL;
> +
> +	newpn->weight =  temp;
> +
> +	return 0;
> +}
> +
> +unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				    dev_t dev)
> +{
> +	struct io_policy_node *pn;
> +
> +	pn = policy_search_node(blkcg, dev);
> +
> +	if (pn)
> +		return pn->weight;
> +	else
> +		return blkcg->weight;
> +}
> +EXPORT_SYMBOL_GPL(blkcg_get_weight);
> +
> +static inline void policy_insert_node(struct blkio_cgroup *blkcg,
> +				      struct io_policy_node *pn)
> +{
> +	list_add(&pn->node, &blkcg->policy_list);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static inline void policy_delete_node(struct io_policy_node *pn)
> +{
> +	list_del(&pn->node);
> +}
> +
> +/* Must be called with blkcg->lock held */
> +static struct io_policy_node *policy_search_node(const struct blkio_cgroup *blkcg,
> +						 dev_t dev)
> +{
> +	struct io_policy_node *pn;
> +
> +	if (list_empty(&blkcg->policy_list))
> +		return NULL;
> +
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		if (pn->dev == dev)
> +			return pn;
> +	}
> +
> +	return NULL;
> +}
> +
> +
> +static int blkiocg_policy_write(struct cgroup *cgrp, struct cftype *cft,
> +				const char *buffer)
> +{
> +	int ret = 0;
> +	char *buf;
> +	struct io_policy_node *newpn, *pn;
> +	struct blkio_cgroup *blkcg;
> +	struct blkio_group *blkg;
> +	int keep_newpn = 0;
> +	struct hlist_node *n;
> +	struct blkio_policy_type *blkiop;
> +
> +	buf = kstrdup(buffer, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
> +	if (!newpn) {
> +		ret = -ENOMEM;
> +		goto free_buf;
> +	}
> +
> +	ret = policy_parse_and_set(buf, newpn);
> +	if (ret)
> +		goto free_newpn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	spin_lock_irq(&blkcg->lock);
> +
> +	pn = policy_search_node(blkcg, newpn->dev);
> +	if (!pn) {
> +		if (newpn->weight != 0) {
> +			policy_insert_node(blkcg, newpn);
> +			keep_newpn = 1;
> +		}
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +
> +	if (newpn->weight == 0) {
> +		/* weight == 0 means deleteing a policy */
> +		policy_delete_node(pn);
> +		spin_unlock_irq(&blkcg->lock);
> +		goto update_io_group;
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +
> +	pn->weight = newpn->weight;
> +
> +update_io_group:
> +	/* update weight for each cfqg */
> +	spin_lock(&blkio_list_lock);
> +	spin_lock_irq(&blkcg->lock);
> +
> +	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
> +		if (newpn->dev == blkg->dev) {
> +			list_for_each_entry(blkiop, &blkio_list, list)
> +				blkiop->ops.blkio_update_group_weight_fn(blkg,
> +						newpn->weight ?
> +						newpn->weight :
> +						blkcg->weight);
> +		}
> +	}
> +
> +	spin_unlock_irq(&blkcg->lock);
> +	spin_unlock(&blkio_list_lock);
> +
> +free_newpn:
> +	if (!keep_newpn)
> +		kfree(newpn);
> +free_buf:
> +	kfree(buf);
> +	return ret;
> +}
> +
> +static int blkiocg_policy_read(struct cgroup *cgrp, struct cftype *cft,
> +			       struct seq_file *m)
> +{
> +	struct blkio_cgroup *blkcg;
> +	struct io_policy_node *pn;
> +
> +	blkcg = cgroup_to_blkio_cgroup(cgrp);
> +
> +	if (list_empty(&blkcg->policy_list))
> +		goto out;
> +
> +	seq_printf(m, "dev\tweight\n");
> +
> +	spin_lock_irq(&blkcg->lock);
> +	list_for_each_entry(pn, &blkcg->policy_list, node) {
> +		seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> +			   MINOR(pn->dev), pn->weight);
> +	}
> +	spin_unlock_irq(&blkcg->lock);
> +out:
> +	return 0;
> +}
> +
>  struct cftype blkio_files[] = {
>  	{
> +		.name = "policy",
> +		.read_seq_string = blkiocg_policy_read,
> +		.write_string = blkiocg_policy_write,
> +		.max_write_len = 256,
> +	},
> +	{
>  		.name = "weight",
>  		.read_u64 = blkiocg_weight_read,
>  		.write_u64 = blkiocg_weight_write,
> @@ -234,6 +475,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
>  	struct blkio_group *blkg;
>  	void *key;
>  	struct blkio_policy_type *blkiop;
> +	struct io_policy_node *pn, *pntmp;
>  
>  	rcu_read_lock();
>  remove_entry:
> @@ -264,7 +506,12 @@ remove_entry:
>  		blkiop->ops.blkio_unlink_group_fn(key, blkg);
>  	spin_unlock(&blkio_list_lock);
>  	goto remove_entry;
> +
>  done:
> +	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
> +		policy_delete_node(pn);
> +		kfree(pn);
> +	}
>  	free_css_id(&blkio_subsys, &blkcg->css);
>  	rcu_read_unlock();
>  	kfree(blkcg);
> @@ -294,6 +541,7 @@ done:
>  	spin_lock_init(&blkcg->lock);
>  	INIT_HLIST_HEAD(&blkcg->blkg_list);
>  
> +	INIT_LIST_HEAD(&blkcg->policy_list);
>  	return &blkcg->css;
>  }
>  
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index 4d316df..0551aa1 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -22,6 +22,7 @@ struct blkio_cgroup {
>  	unsigned int weight;
>  	spinlock_t lock;
>  	struct hlist_head blkg_list;
> +	struct list_head policy_list; /* list of io_policy_node */
>  };
>  
>  struct blkio_group {
> @@ -43,8 +44,16 @@ struct blkio_group {
>  	unsigned long sectors;
>  };
>  
> +struct io_policy_node {
> +	struct list_head node;
> +	dev_t dev;
> +	unsigned int weight;
> +};
> +
>  extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
>  extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
> +extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
> +				     dev_t dev);
>  
>  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 023f4e6..3d0dea1 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -963,7 +963,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	if (!cfqg)
>  		goto done;
>  
> -	cfqg->weight = blkcg->weight;
>  	for_each_cfqg_st(cfqg, i, j, st)
>  		*st = CFQ_RB_ROOT;
>  	RB_CLEAR_NODE(&cfqg->rb_node);
> @@ -980,6 +979,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  	sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
>  	blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
>  					MKDEV(major, minor));
> +	cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
>  
>  	/* Add group on cfqd list */
>  	hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
> -- 
> 1.5.4.rc3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ