lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 21 Mar 2011 18:10:43 -0700
From:	Chad Talbott <ctalbott@...gle.com>
To:	jaxboe@...ionio.com, vgoyal@...hat.com
Cc:	linux-kernel@...r.kernel.org, mrubin@...gle.com,
	teravest@...gle.com, Chad Talbott <ctalbott@...gle.com>
Subject: [PATCH 1/3] cfq-iosched: Fair cross-group preemption (interface and documentation)

blkio.class is a new interface that allows you to specify a cgroup as
requiring low-latency service on a specific block device.  This patch
includes only the interface and documentation.

Signed-off-by: Chad Talbott <ctalbott@...gle.com>
---
 Documentation/cgroups/blkio-controller.txt |   16 ++++
 block/blk-cgroup.c                         |  121 +++++++++++++++++++++-------
 block/blk-cgroup.h                         |   36 +++++++--
 block/cfq-iosched.c                        |   12 +++-
 4 files changed, 145 insertions(+), 40 deletions(-)

diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index 4ed7b5c..7acf9d2 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -169,6 +169,22 @@ Proportional weight policy files
 	  dev     weight
 	  8:16    300
 
+- blkio.class
+	- Specifies the per-cgroup, per-device service class.  There
+	  are currently two service classes: real-time (1) and
+	  best-effort (2).  By default, all groups start as
+	  best-effort.
+
+	  #echo dev_maj:dev_minor class > /path/to/cgroup/blkio.class
+	  Configure class=real-time on /dev/sdb (8:16) in this cgroup
+	  # echo '8:16 1' > blkio.class
+	  # cat blkio.class
+	  8:16    1
+
+	  A real-time group can preempt a best-effort group, only as
+	  long as it's within its assigned weight; i.e. fairness is
+	  preserved.
+
 - blkio.time
 	- disk time allocated to cgroup per device in milliseconds. First
 	  two fields specify the major and minor number of the device and
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 455768a..f2c553e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -129,6 +129,21 @@ blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
 	}
 }
 
+static inline void
+blkio_update_group_class(struct blkio_group *blkg, unsigned int class)
+{
+	struct blkio_policy_type *blkiop;
+
+	list_for_each_entry(blkiop, &blkio_list, list) {
+		/* If this policy does not own the blkg, do not send updates */
+		if (blkiop->plid != blkg->plid)
+			continue;
+		if (blkiop->ops.blkio_update_group_class_fn)
+			blkiop->ops.blkio_update_group_class_fn(blkg->key,
+								blkg, class);
+	}
+}
+
 static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
 				int fileid)
 {
@@ -708,15 +723,29 @@ static int blkio_policy_parse_and_set(char *buf,
 
 	switch (plid) {
 	case BLKIO_POLICY_PROP:
-		ret = strict_strtoul(s[1], 10, &temp);
-		if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
-			temp > BLKIO_WEIGHT_MAX)
-			return -EINVAL;
-
-		newpn->plid = plid;
-		newpn->fileid = fileid;
-		newpn->val.weight = temp;
-		break;
+		switch (fileid) {
+		case BLKIO_PROP_weight_device:
+			ret = strict_strtoul(s[1], 10, &temp);
+			if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
+			    temp > BLKIO_WEIGHT_MAX)
+				return -EINVAL;
+
+			newpn->plid = plid;
+			newpn->fileid = fileid;
+			newpn->val.prop.weight = temp;
+			break;
+		case BLKIO_PROP_class:
+			ret = strict_strtoul(s[1], 10, &temp);
+			if (ret ||
+			    temp < BLKIO_RT_CLASS ||
+			    temp > BLKIO_BE_CLASS)
+				return -EINVAL;
+
+			newpn->plid = plid;
+			newpn->fileid = fileid;
+			newpn->val.prop.class = temp;
+			break;
+		}
 	case BLKIO_POLICY_THROTL:
 		switch(fileid) {
 		case BLKIO_THROTL_read_bps_device:
@@ -727,7 +756,7 @@ static int blkio_policy_parse_and_set(char *buf,
 
 			newpn->plid = plid;
 			newpn->fileid = fileid;
-			newpn->val.bps = bps;
+			newpn->val.throtl.bps = bps;
 			break;
 		case BLKIO_THROTL_read_iops_device:
 		case BLKIO_THROTL_write_iops_device:
@@ -740,7 +769,7 @@ static int blkio_policy_parse_and_set(char *buf,
 
 			newpn->plid = plid;
 			newpn->fileid = fileid;
-			newpn->val.iops = (unsigned int)iops;
+			newpn->val.throtl.iops = (unsigned int)iops;
 			break;
 		}
 		break;
@@ -759,20 +788,34 @@ unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
 	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
 				BLKIO_PROP_weight_device);
 	if (pn)
-		return pn->val.weight;
+		return pn->val.prop.weight;
 	else
 		return blkcg->weight;
 }
 EXPORT_SYMBOL_GPL(blkcg_get_weight);
 
-uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
+enum blkcg_class blkcg_get_class(struct blkio_cgroup *blkcg,
+			     dev_t dev)
 {
 	struct blkio_policy_node *pn;
+	enum blkcg_class class = BLKIO_DEFAULT_CLASS;
+
+	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
+				BLKIO_PROP_class);
+	if (pn)
+		class = pn->val.prop.class;
 
+	return class;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_class);
+
+uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
+{
+	struct blkio_policy_node *pn;
 	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_read_bps_device);
 	if (pn)
-		return pn->val.bps;
+		return pn->val.throtl.bps;
 	else
 		return -1;
 }
@@ -783,7 +826,7 @@ uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
 	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_write_bps_device);
 	if (pn)
-		return pn->val.bps;
+		return pn->val.throtl.bps;
 	else
 		return -1;
 }
@@ -795,7 +838,7 @@ unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
 	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_read_iops_device);
 	if (pn)
-		return pn->val.iops;
+		return pn->val.throtl.iops;
 	else
 		return -1;
 }
@@ -806,7 +849,7 @@ unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
 	pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 				BLKIO_THROTL_write_iops_device);
 	if (pn)
-		return pn->val.iops;
+		return pn->val.throtl.iops;
 	else
 		return -1;
 }
@@ -816,19 +859,19 @@ static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
 {
 	switch(pn->plid) {
 	case BLKIO_POLICY_PROP:
-		if (pn->val.weight == 0)
+		if (pn->val.prop.weight == 0)
 			return 1;
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(pn->fileid) {
 		case BLKIO_THROTL_read_bps_device:
 		case BLKIO_THROTL_write_bps_device:
-			if (pn->val.bps == 0)
+			if (pn->val.throtl.bps == 0)
 				return 1;
 			break;
 		case BLKIO_THROTL_read_iops_device:
 		case BLKIO_THROTL_write_iops_device:
-			if (pn->val.iops == 0)
+			if (pn->val.throtl.iops == 0)
 				return 1;
 		}
 		break;
@@ -844,17 +887,17 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
 {
 	switch(oldpn->plid) {
 	case BLKIO_POLICY_PROP:
-		oldpn->val.weight = newpn->val.weight;
+		oldpn->val.prop.weight = newpn->val.prop.weight;
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(newpn->fileid) {
 		case BLKIO_THROTL_read_bps_device:
 		case BLKIO_THROTL_write_bps_device:
-			oldpn->val.bps = newpn->val.bps;
+			oldpn->val.throtl.bps = newpn->val.throtl.bps;
 			break;
 		case BLKIO_THROTL_read_iops_device:
 		case BLKIO_THROTL_write_iops_device:
-			oldpn->val.iops = newpn->val.iops;
+			oldpn->val.throtl.iops = newpn->val.throtl.iops;
 		}
 		break;
 	default:
@@ -872,22 +915,24 @@ static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
 	unsigned int weight, iops;
 	u64 bps;
 
+
 	switch(pn->plid) {
 	case BLKIO_POLICY_PROP:
-		weight = pn->val.weight ? pn->val.weight :
+		weight = pn->val.prop.weight ? pn->val.prop.weight :
 				blkcg->weight;
 		blkio_update_group_weight(blkg, weight);
+		blkio_update_group_class(blkg, pn->val.prop.class);
 		break;
 	case BLKIO_POLICY_THROTL:
 		switch(pn->fileid) {
 		case BLKIO_THROTL_read_bps_device:
 		case BLKIO_THROTL_write_bps_device:
-			bps = pn->val.bps ? pn->val.bps : (-1);
+			bps = pn->val.throtl.bps ? pn->val.throtl.bps : (-1);
 			blkio_update_group_bps(blkg, bps, pn->fileid);
 			break;
 		case BLKIO_THROTL_read_iops_device:
 		case BLKIO_THROTL_write_iops_device:
-			iops = pn->val.iops ? pn->val.iops : (-1);
+			iops = pn->val.throtl.iops ? pn->val.throtl.iops : (-1);
 			blkio_update_group_iops(blkg, iops, pn->fileid);
 			break;
 		}
@@ -984,21 +1029,28 @@ blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
 {
 	switch(pn->plid) {
 		case BLKIO_POLICY_PROP:
-			if (pn->fileid == BLKIO_PROP_weight_device)
+			switch(pn->fileid) {
+			case BLKIO_PROP_weight_device:
+				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
+					   MINOR(pn->dev), pn->val.prop.weight);
+				break;
+			case BLKIO_PROP_class:
 				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
-					MINOR(pn->dev), pn->val.weight);
+					   MINOR(pn->dev), pn->val.prop.class);
+				break;
+			};
 			break;
 		case BLKIO_POLICY_THROTL:
 			switch(pn->fileid) {
 			case BLKIO_THROTL_read_bps_device:
 			case BLKIO_THROTL_write_bps_device:
 				seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
-					MINOR(pn->dev), pn->val.bps);
+					   MINOR(pn->dev), pn->val.throtl.bps);
 				break;
 			case BLKIO_THROTL_read_iops_device:
 			case BLKIO_THROTL_write_iops_device:
 				seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
-					MINOR(pn->dev), pn->val.iops);
+					   MINOR(pn->dev), pn->val.throtl.iops);
 				break;
 			}
 			break;
@@ -1037,6 +1089,7 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
 	case BLKIO_POLICY_PROP:
 		switch(name) {
 		case BLKIO_PROP_weight_device:
+		case BLKIO_PROP_class:
 			blkio_read_policy_node_files(cft, blkcg, m);
 			return 0;
 		default:
@@ -1243,6 +1296,14 @@ struct cftype blkio_files[] = {
 		.max_write_len = 256,
 	},
 	{
+		.name = "class",
+		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
+				BLKIO_PROP_class),
+		.read_seq_string = blkiocg_file_read,
+		.write_string = blkiocg_file_write,
+		.max_write_len = 256,
+	},
+	{
 		.name = "weight",
 		.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
 				BLKIO_PROP_weight),
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ea4861b..0e9cd32 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -73,10 +73,18 @@ enum blkg_state_flags {
 	BLKG_empty,
 };
 
+enum blkcg_class {
+	BLKIO_RT_CLASS = 1,
+	BLKIO_BE_CLASS = 2,
+};
+
+#define BLKIO_DEFAULT_CLASS BLKIO_BE_CLASS
+
 /* cgroup files owned by proportional weight policy */
 enum blkcg_file_name_prop {
 	BLKIO_PROP_weight = 1,
 	BLKIO_PROP_weight_device,
+	BLKIO_PROP_class,
 	BLKIO_PROP_io_service_bytes,
 	BLKIO_PROP_io_serviced,
 	BLKIO_PROP_time,
@@ -157,6 +165,21 @@ struct blkio_group {
 	struct blkio_group_stats stats;
 };
 
+struct proportion_policy {
+	unsigned int weight;
+	unsigned int class;
+};
+
+union throttle_policy {
+	/*
+	 * Rate read/write in terms of byptes per second
+	 * Whether this rate represents read or write is determined
+	 * by file type "fileid".
+	 */
+	u64 bps;
+	unsigned int iops;
+};
+
 struct blkio_policy_node {
 	struct list_head node;
 	dev_t dev;
@@ -166,14 +189,8 @@ struct blkio_policy_node {
 	int fileid;
 
 	union {
-		unsigned int weight;
-		/*
-		 * Rate read/write in terms of byptes per second
-		 * Whether this rate represents read or write is determined
-		 * by file type "fileid".
-		 */
-		u64 bps;
-		unsigned int iops;
+		struct proportion_policy prop;
+		union throttle_policy throtl;
 	} val;
 };
 
@@ -192,6 +209,8 @@ typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
 
 typedef void (blkio_update_group_weight_fn) (void *key,
 			struct blkio_group *blkg, unsigned int weight);
+typedef void (blkio_update_group_class_fn) (void *key,
+			struct blkio_group *blkg, unsigned int class);
 typedef void (blkio_update_group_read_bps_fn) (void * key,
 			struct blkio_group *blkg, u64 read_bps);
 typedef void (blkio_update_group_write_bps_fn) (void *key,
@@ -204,6 +223,7 @@ typedef void (blkio_update_group_write_iops_fn) (void *key,
 struct blkio_policy_ops {
 	blkio_unlink_group_fn *blkio_unlink_group_fn;
 	blkio_update_group_weight_fn *blkio_update_group_weight_fn;
+	blkio_update_group_class_fn *blkio_update_group_class_fn;
 	blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
 	blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
 	blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ea83a4f..55e78b7 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -179,6 +179,7 @@ struct cfq_group {
 	/* group service_tree key */
 	u64 vdisktime;
 	unsigned int weight;
+	enum blkcg_class class;
 
 	/* number of cfqq currently on this group */
 	int nr_cfqq;
@@ -988,6 +989,12 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
 	cfqg_of_blkg(blkg)->weight = weight;
 }
 
+void cfq_update_blkio_group_class(void *key, struct blkio_group *blkg,
+					unsigned int class)
+{
+	cfqg_of_blkg(blkg)->class = class;
+}
+
 static struct cfq_group *
 cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
 {
@@ -4115,8 +4122,9 @@ static struct elevator_type iosched_cfq = {
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 static struct blkio_policy_type blkio_policy_cfq = {
 	.ops = {
-		.blkio_unlink_group_fn =	cfq_unlink_blkio_group,
-		.blkio_update_group_weight_fn =	cfq_update_blkio_group_weight,
+		.blkio_unlink_group_fn =	       cfq_unlink_blkio_group,
+		.blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
+		.blkio_update_group_class_fn =  cfq_update_blkio_group_class,
 	},
 	.plid = BLKIO_POLICY_PROP,
 };
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists