lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 22 Mar 2011 18:03:09 +0800
From:	Gui Jianfeng <guijianfeng@...fujitsu.com>
To:	Chad Talbott <ctalbott@...gle.com>
CC:	jaxboe@...ionio.com, vgoyal@...hat.com,
	linux-kernel@...r.kernel.org, mrubin@...gle.com,
	teravest@...gle.com
Subject: Re: [PATCH 1/3] cfq-iosched: Fair cross-group preemption (interface
 and documentation)

Chad Talbott wrote:
> blkio.class is a new interface that allows you to specify a cgroup as
> requiring low-latency service on a specific block device.  This patch
> includes only the interface and documentation.
> 
> Signed-off-by: Chad Talbott <ctalbott@...gle.com>
> ---
>  Documentation/cgroups/blkio-controller.txt |   16 ++++
>  block/blk-cgroup.c                         |  121 +++++++++++++++++++++-------
>  block/blk-cgroup.h                         |   36 +++++++--
>  block/cfq-iosched.c                        |   12 +++-
>  4 files changed, 145 insertions(+), 40 deletions(-)
> 
> diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
> index 4ed7b5c..7acf9d2 100644
> --- a/Documentation/cgroups/blkio-controller.txt
> +++ b/Documentation/cgroups/blkio-controller.txt
> @@ -169,6 +169,22 @@ Proportional weight policy files
>  	  dev     weight
>  	  8:16    300
>  
> +- blkio.class
> +	- Specifies the per-cgroup, per-device service class.  There

Hi Chad,

Do we need to implement a globle blkio.class interface as well as
a device specific one something like "blkio.class_device"? So a user
doesn't need to check every device number if he want to specify a
global value.

Thanks,
Gui

> +	  are currently two service classes: real-time (1) and
> +	  best-effort (2).  By default, all groups start as
> +	  best-effort.
> +
> +	  #echo dev_maj:dev_minor class > /path/to/cgroup/blkio.class
> +	  Configure class=real-time on /dev/sdb (8:16) in this cgroup
> +	  # echo '8:16 1' > blkio.class
> +	  # cat blkio.class
> +	  8:16    1
> +
> +	  A real-time group can preempt a best-effort group, only as
> +	  long as it's within its assigned weight; i.e. fairness is
> +	  preserved.
> +
>  - blkio.time
>  	- disk time allocated to cgroup per device in milliseconds. First
>  	  two fields specify the major and minor number of the device and
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 455768a..f2c553e 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -129,6 +129,21 @@ blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
>  	}
>  }
>  
> +static inline void
> +blkio_update_group_class(struct blkio_group *blkg, unsigned int class)
> +{
> +	struct blkio_policy_type *blkiop;
> +
> +	list_for_each_entry(blkiop, &blkio_list, list) {
> +		/* If this policy does not own the blkg, do not send updates */
> +		if (blkiop->plid != blkg->plid)
> +			continue;
> +		if (blkiop->ops.blkio_update_group_class_fn)
> +			blkiop->ops.blkio_update_group_class_fn(blkg->key,
> +								blkg, class);
> +	}
> +}
> +
>  static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
>  				int fileid)
>  {
> @@ -708,15 +723,29 @@ static int blkio_policy_parse_and_set(char *buf,
>  
>  	switch (plid) {
>  	case BLKIO_POLICY_PROP:
> -		ret = strict_strtoul(s[1], 10, &temp);
> -		if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> -			temp > BLKIO_WEIGHT_MAX)
> -			return -EINVAL;
> -
> -		newpn->plid = plid;
> -		newpn->fileid = fileid;
> -		newpn->val.weight = temp;
> -		break;
> +		switch (fileid) {
> +		case BLKIO_PROP_weight_device:
> +			ret = strict_strtoul(s[1], 10, &temp);
> +			if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
> +			    temp > BLKIO_WEIGHT_MAX)
> +				return -EINVAL;
> +
> +			newpn->plid = plid;
> +			newpn->fileid = fileid;
> +			newpn->val.prop.weight = temp;
> +			break;
> +		case BLKIO_PROP_class:
> +			ret = strict_strtoul(s[1], 10, &temp);
> +			if (ret ||
> +			    temp < BLKIO_RT_CLASS ||
> +			    temp > BLKIO_BE_CLASS)
> +				return -EINVAL;
> +
> +			newpn->plid = plid;
> +			newpn->fileid = fileid;
> +			newpn->val.prop.class = temp;
> +			break;
> +		}
>  	case BLKIO_POLICY_THROTL:
>  		switch(fileid) {
>  		case BLKIO_THROTL_read_bps_device:
> @@ -727,7 +756,7 @@ static int blkio_policy_parse_and_set(char *buf,
>  
>  			newpn->plid = plid;
>  			newpn->fileid = fileid;
> -			newpn->val.bps = bps;
> +			newpn->val.throtl.bps = bps;
>  			break;
>  		case BLKIO_THROTL_read_iops_device:
>  		case BLKIO_THROTL_write_iops_device:
> @@ -740,7 +769,7 @@ static int blkio_policy_parse_and_set(char *buf,
>  
>  			newpn->plid = plid;
>  			newpn->fileid = fileid;
> -			newpn->val.iops = (unsigned int)iops;
> +			newpn->val.throtl.iops = (unsigned int)iops;
>  			break;
>  		}
>  		break;
> @@ -759,20 +788,34 @@ unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
>  	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
>  				BLKIO_PROP_weight_device);
>  	if (pn)
> -		return pn->val.weight;
> +		return pn->val.prop.weight;
>  	else
>  		return blkcg->weight;
>  }
>  EXPORT_SYMBOL_GPL(blkcg_get_weight);
>  
> -uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
> +enum blkcg_class blkcg_get_class(struct blkio_cgroup *blkcg,
> +			     dev_t dev)
>  {
>  	struct blkio_policy_node *pn;
> +	enum blkcg_class class = BLKIO_DEFAULT_CLASS;
> +
> +	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
> +				BLKIO_PROP_class);
> +	if (pn)
> +		class = pn->val.prop.class;
>  
> +	return class;
> +}
> +EXPORT_SYMBOL_GPL(blkcg_get_class);
> +
> +uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
> +{
> +	struct blkio_policy_node *pn;
>  	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
>  				BLKIO_THROTL_read_bps_device);
>  	if (pn)
> -		return pn->val.bps;
> +		return pn->val.throtl.bps;
>  	else
>  		return -1;
>  }
> @@ -783,7 +826,7 @@ uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
>  	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
>  				BLKIO_THROTL_write_bps_device);
>  	if (pn)
> -		return pn->val.bps;
> +		return pn->val.throtl.bps;
>  	else
>  		return -1;
>  }
> @@ -795,7 +838,7 @@ unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
>  	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
>  				BLKIO_THROTL_read_iops_device);
>  	if (pn)
> -		return pn->val.iops;
> +		return pn->val.throtl.iops;
>  	else
>  		return -1;
>  }
> @@ -806,7 +849,7 @@ unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
>  	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
>  				BLKIO_THROTL_write_iops_device);
>  	if (pn)
> -		return pn->val.iops;
> +		return pn->val.throtl.iops;
>  	else
>  		return -1;
>  }
> @@ -816,19 +859,19 @@ static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
>  {
>  	switch(pn->plid) {
>  	case BLKIO_POLICY_PROP:
> -		if (pn->val.weight == 0)
> +		if (pn->val.prop.weight == 0)
>  			return 1;
>  		break;
>  	case BLKIO_POLICY_THROTL:
>  		switch(pn->fileid) {
>  		case BLKIO_THROTL_read_bps_device:
>  		case BLKIO_THROTL_write_bps_device:
> -			if (pn->val.bps == 0)
> +			if (pn->val.throtl.bps == 0)
>  				return 1;
>  			break;
>  		case BLKIO_THROTL_read_iops_device:
>  		case BLKIO_THROTL_write_iops_device:
> -			if (pn->val.iops == 0)
> +			if (pn->val.throtl.iops == 0)
>  				return 1;
>  		}
>  		break;
> @@ -844,17 +887,17 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
>  {
>  	switch(oldpn->plid) {
>  	case BLKIO_POLICY_PROP:
> -		oldpn->val.weight = newpn->val.weight;
> +		oldpn->val.prop.weight = newpn->val.prop.weight;
>  		break;
>  	case BLKIO_POLICY_THROTL:
>  		switch(newpn->fileid) {
>  		case BLKIO_THROTL_read_bps_device:
>  		case BLKIO_THROTL_write_bps_device:
> -			oldpn->val.bps = newpn->val.bps;
> +			oldpn->val.throtl.bps = newpn->val.throtl.bps;
>  			break;
>  		case BLKIO_THROTL_read_iops_device:
>  		case BLKIO_THROTL_write_iops_device:
> -			oldpn->val.iops = newpn->val.iops;
> +			oldpn->val.throtl.iops = newpn->val.throtl.iops;
>  		}
>  		break;
>  	default:
> @@ -872,22 +915,24 @@ static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
>  	unsigned int weight, iops;
>  	u64 bps;
>  
> +
>  	switch(pn->plid) {
>  	case BLKIO_POLICY_PROP:
> -		weight = pn->val.weight ? pn->val.weight :
> +		weight = pn->val.prop.weight ? pn->val.prop.weight :
>  				blkcg->weight;
>  		blkio_update_group_weight(blkg, weight);
> +		blkio_update_group_class(blkg, pn->val.prop.class);
>  		break;
>  	case BLKIO_POLICY_THROTL:
>  		switch(pn->fileid) {
>  		case BLKIO_THROTL_read_bps_device:
>  		case BLKIO_THROTL_write_bps_device:
> -			bps = pn->val.bps ? pn->val.bps : (-1);
> +			bps = pn->val.throtl.bps ? pn->val.throtl.bps : (-1);
>  			blkio_update_group_bps(blkg, bps, pn->fileid);
>  			break;
>  		case BLKIO_THROTL_read_iops_device:
>  		case BLKIO_THROTL_write_iops_device:
> -			iops = pn->val.iops ? pn->val.iops : (-1);
> +			iops = pn->val.throtl.iops ? pn->val.throtl.iops : (-1);
>  			blkio_update_group_iops(blkg, iops, pn->fileid);
>  			break;
>  		}
> @@ -984,21 +1029,28 @@ blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
>  {
>  	switch(pn->plid) {
>  		case BLKIO_POLICY_PROP:
> -			if (pn->fileid == BLKIO_PROP_weight_device)
> +			switch(pn->fileid) {
> +			case BLKIO_PROP_weight_device:
> +				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> +					   MINOR(pn->dev), pn->val.prop.weight);
> +				break;
> +			case BLKIO_PROP_class:
>  				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> -					MINOR(pn->dev), pn->val.weight);
> +					   MINOR(pn->dev), pn->val.prop.class);
> +				break;
> +			};
>  			break;
>  		case BLKIO_POLICY_THROTL:
>  			switch(pn->fileid) {
>  			case BLKIO_THROTL_read_bps_device:
>  			case BLKIO_THROTL_write_bps_device:
>  				seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
> -					MINOR(pn->dev), pn->val.bps);
> +					   MINOR(pn->dev), pn->val.throtl.bps);
>  				break;
>  			case BLKIO_THROTL_read_iops_device:
>  			case BLKIO_THROTL_write_iops_device:
>  				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
> -					MINOR(pn->dev), pn->val.iops);
> +					   MINOR(pn->dev), pn->val.throtl.iops);
>  				break;
>  			}
>  			break;
> @@ -1037,6 +1089,7 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
>  	case BLKIO_POLICY_PROP:
>  		switch(name) {
>  		case BLKIO_PROP_weight_device:
> +		case BLKIO_PROP_class:
>  			blkio_read_policy_node_files(cft, blkcg, m);
>  			return 0;
>  		default:
> @@ -1243,6 +1296,14 @@ struct cftype blkio_files[] = {
>  		.max_write_len = 256,
>  	},
>  	{
> +		.name = "class",
> +		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
> +				BLKIO_PROP_class),
> +		.read_seq_string = blkiocg_file_read,
> +		.write_string = blkiocg_file_write,
> +		.max_write_len = 256,
> +	},
> +	{
>  		.name = "weight",
>  		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
>  				BLKIO_PROP_weight),
> diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
> index ea4861b..0e9cd32 100644
> --- a/block/blk-cgroup.h
> +++ b/block/blk-cgroup.h
> @@ -73,10 +73,18 @@ enum blkg_state_flags {
>  	BLKG_empty,
>  };
>  
> +enum blkcg_class {
> +	BLKIO_RT_CLASS = 1,
> +	BLKIO_BE_CLASS = 2,
> +};
> +
> +#define BLKIO_DEFAULT_CLASS BLKIO_BE_CLASS
> +
>  /* cgroup files owned by proportional weight policy */
>  enum blkcg_file_name_prop {
>  	BLKIO_PROP_weight = 1,
>  	BLKIO_PROP_weight_device,
> +	BLKIO_PROP_class,
>  	BLKIO_PROP_io_service_bytes,
>  	BLKIO_PROP_io_serviced,
>  	BLKIO_PROP_time,
> @@ -157,6 +165,21 @@ struct blkio_group {
>  	struct blkio_group_stats stats;
>  };
>  
> +struct proportion_policy {
> +	unsigned int weight;
> +	unsigned int class;
> +};
> +
> +union throttle_policy {
> +	/*
> +	 * Rate read/write in terms of byptes per second
> +	 * Whether this rate represents read or write is determined
> +	 * by file type "fileid".
> +	 */
> +	u64 bps;
> +	unsigned int iops;
> +};
> +
>  struct blkio_policy_node {
>  	struct list_head node;
>  	dev_t dev;
> @@ -166,14 +189,8 @@ struct blkio_policy_node {
>  	int fileid;
>  
>  	union {
> -		unsigned int weight;
> -		/*
> -		 * Rate read/write in terms of byptes per second
> -		 * Whether this rate represents read or write is determined
> -		 * by file type "fileid".
> -		 */
> -		u64 bps;
> -		unsigned int iops;
> +		struct proportion_policy prop;
> +		union throttle_policy throtl;
>  	} val;
>  };
>  
> @@ -192,6 +209,8 @@ typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
>  
>  typedef void (blkio_update_group_weight_fn) (void *key,
>  			struct blkio_group *blkg, unsigned int weight);
> +typedef void (blkio_update_group_class_fn) (void *key,
> +			struct blkio_group *blkg, unsigned int class);
>  typedef void (blkio_update_group_read_bps_fn) (void * key,
>  			struct blkio_group *blkg, u64 read_bps);
>  typedef void (blkio_update_group_write_bps_fn) (void *key,
> @@ -204,6 +223,7 @@ typedef void (blkio_update_group_write_iops_fn) (void *key,
>  struct blkio_policy_ops {
>  	blkio_unlink_group_fn *blkio_unlink_group_fn;
>  	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
> +	blkio_update_group_class_fn *blkio_update_group_class_fn;
>  	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
>  	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
>  	blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index ea83a4f..55e78b7 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -179,6 +179,7 @@ struct cfq_group {
>  	/* group service_tree key */
>  	u64 vdisktime;
>  	unsigned int weight;
> +	enum blkcg_class class;
>  
>  	/* number of cfqq currently on this group */
>  	int nr_cfqq;
> @@ -988,6 +989,12 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
>  	cfqg_of_blkg(blkg)->weight = weight;
>  }
>  
> +void cfq_update_blkio_group_class(void *key, struct blkio_group *blkg,
> +					unsigned int class)
> +{
> +	cfqg_of_blkg(blkg)->class = class;
> +}
> +
>  static struct cfq_group *
>  cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>  {
> @@ -4115,8 +4122,9 @@ static struct elevator_type iosched_cfq = {
>  #ifdef CONFIG_CFQ_GROUP_IOSCHED
>  static struct blkio_policy_type blkio_policy_cfq = {
>  	.ops = {
> -		.blkio_unlink_group_fn =	cfq_unlink_blkio_group,
> -		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
> +		.blkio_unlink_group_fn =	       cfq_unlink_blkio_group,
> +		.blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
> +		.blkio_update_group_class_fn =  cfq_update_blkio_group_class,
>  	},
>  	.plid = BLKIO_POLICY_PROP,
>  };
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ