lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20091117185742.GA19829@srcf.ucam.org>
Date:	Tue, 17 Nov 2009 18:57:42 +0000
From:	Matthew Garrett <mjg59@...f.ucam.org>
To:	David Zeuthen <david@...ar.dk>
Cc:	Kay Sievers <kay.sievers@...y.org>, linux-kernel@...r.kernel.org,
	axboe@...nel.dk, linux-hotplug@...r.kernel.org
Subject: Re: [PATCH] [RFC] Add support for uevents on block device idle
	changes

Ok. How about something like this? It adds an extra field to the stat 
file and introduces David's suggestion of making it pollable.

commit ba6d4c7ab7940ae8dc11a884281d0a36b20455b9
Author: Matthew Garrett <mjg@...hat.com>
Date:   Mon Nov 16 17:44:03 2009 -0500

    [RFC] Add support for uevents on block device idle changes
    
    Userspace may wish to know whether a given disk is active or idle, for
    example to modify power management policy based on access patterns. This
    patch adds a deferrable timer to the block layer which will fire if the
    disk is idle for a user-definable period of time, generating a uevent. A
    uevent will also be generated if an access is received while the disk is
    classified as idle.

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3beda..8747f42 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -3,7 +3,7 @@ Date:		February 2008
 Contact:	Jerome Marchand <jmarchan@...hat.com>
 Description:
 		The /sys/block/<disk>/stat files displays the I/O
-		statistics of disk <disk>. They contain 11 fields:
+		statistics of disk <disk>. They contain 12 fields:
 		 1 - reads completed succesfully
 		 2 - reads merged
 		 3 - sectors read
@@ -15,6 +15,7 @@ Description:
 		 9 - I/Os currently in progress
 		10 - time spent doing I/Os (ms)
 		11 - weighted time spent doing I/Os (ms)
+		12 - 1 if the disk is idle (determined by idle_hysteresis)
 		For more details refer Documentation/iostats.txt
 
 
@@ -128,3 +129,12 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/idle_hysteresis
+Date:		November 2009
+Contact:	Matthew Garrett <mjg@...hat.com>
+Description:
+		Contains the number of milliseconds to wait after an access
+		before declaring that a disk is idle. Any accesses during
+		this time will reset the timer. "0" (the default) indicates
+		that no events will be generated.
\ No newline at end of file
diff --git a/block/blk-core.c b/block/blk-core.c
index 71da511..f278817 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
 		if (should_fail_request(bio))
 			goto end_io;
 
+		if (bio->bi_bdev->bd_disk->hysteresis_time &&
+		    bio_has_data(bio) &&
+		    !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
+			       jiffies+msecs_to_jiffies
+			       (bio->bi_bdev->bd_disk->hysteresis_time))) {
+			bio->bi_bdev->bd_disk->idle = 0;
+			schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
+		}
+
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..ea37e48 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -504,6 +504,21 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
+static void disk_idle(unsigned long data)
+{
+	struct gendisk *gd = (struct gendisk *)data;
+
+	gd->idle = 1;
+	schedule_work(&gd->idle_notify);
+}
+
+static void disk_idle_notify_thread(struct work_struct *work)
+{
+	struct gendisk *gd = container_of(work, struct gendisk, idle_notify);
+
+	sysfs_notify(&disk_to_dev(gd)->kobj, NULL, "stat");
+}
+
 /**
  * add_disk - add partitioning information to kernel list
  * @disk: per-device partitioning information
@@ -543,6 +558,10 @@ void add_disk(struct gendisk *disk)
 
 	blk_register_region(disk_devt(disk), disk->minors, NULL,
 			    exact_match, exact_lock, disk);
+
+	init_timer(&disk->hysteresis_timer);
+	setup_timer(&disk->hysteresis_timer, disk_idle, (unsigned long)disk);
+
 	register_disk(disk);
 	blk_register_queue(disk);
 
@@ -861,6 +880,32 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_idle_hysteresis_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk->hysteresis_time);
+}
+
+static ssize_t disk_idle_hysteresis_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	unsigned long timeout;
+	int res;
+
+	res = strict_strtoul(buf, 10, &timeout);
+	if (res)
+		return -EINVAL;
+
+	disk->hysteresis_time = timeout;
+
+	return count;
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@ -870,6 +915,8 @@ static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(idle_hysteresis, 0644, disk_idle_hysteresis_show,
+		   disk_idle_hysteresis_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -890,6 +937,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
+	&dev_attr_idle_hysteresis.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1183,6 +1231,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
 		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		INIT_WORK(&disk->idle_notify,
+			  disk_idle_notify_thread);
 	}
 	return disk;
 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..cccfb7d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -230,6 +230,7 @@ ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	struct gendisk *gd = dev_to_disk(dev);
 	int cpu;
 
 	cpu = part_stat_lock();
@@ -238,7 +239,7 @@ ssize_t part_stat_show(struct device *dev,
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8u %8u %8u %1u"
 		"\n",
 		part_stat_read(p, ios[READ]),
 		part_stat_read(p, merges[READ]),
@@ -250,7 +251,8 @@ ssize_t part_stat_show(struct device *dev,
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
 		part_in_flight(p),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
-		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+		gd->idle);
 }
 
 ssize_t part_inflight_show(struct device *dev,
@@ -652,6 +654,9 @@ void del_gendisk(struct gendisk *disk)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
+	del_timer_sync(&disk->hysteresis_timer);
+	cancel_work_sync(&disk->idle_notify);
+
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..7e969a5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -163,10 +164,15 @@ struct gendisk {
 
 	atomic_t sync_io;		/* RAID */
 	struct work_struct async_notify;
+	struct work_struct idle_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
 	int node_id;
+
+	bool idle;
+	int hysteresis_time;
+	struct timer_list hysteresis_timer;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)

-- 
Matthew Garrett | mjg59@...f.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ