[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20091117185742.GA19829@srcf.ucam.org>
Date: Tue, 17 Nov 2009 18:57:42 +0000
From: Matthew Garrett <mjg59@...f.ucam.org>
To: David Zeuthen <david@...ar.dk>
Cc: Kay Sievers <kay.sievers@...y.org>, linux-kernel@...r.kernel.org,
axboe@...nel.dk, linux-hotplug@...r.kernel.org
Subject: Re: [PATCH] [RFC] Add support for uevents on block device idle
changes
Ok. How about something like this? It adds an extra field to the stat
file and introduces David's suggestion of making it pollable.
commit ba6d4c7ab7940ae8dc11a884281d0a36b20455b9
Author: Matthew Garrett <mjg@...hat.com>
Date: Mon Nov 16 17:44:03 2009 -0500
[RFC] Add support for uevents on block device idle changes
Userspace may wish to know whether a given disk is active or idle, for
example to modify power management policy based on access patterns. This
patch adds a deferrable timer to the block layer which will fire if the
disk is idle for a user-definable period of time, generating a uevent. A
uevent will also be generated if an access is received while the disk is
classified as idle.
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3beda..8747f42 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -3,7 +3,7 @@ Date: February 2008
Contact: Jerome Marchand <jmarchan@...hat.com>
Description:
The /sys/block/<disk>/stat files displays the I/O
- statistics of disk <disk>. They contain 11 fields:
+ statistics of disk <disk>. They contain 12 fields:
1 - reads completed succesfully
2 - reads merged
3 - sectors read
@@ -15,6 +15,7 @@ Description:
9 - I/Os currently in progress
10 - time spent doing I/Os (ms)
11 - weighted time spent doing I/Os (ms)
+ 12 - 1 if the disk is idle (determined by idle_hysteresis)
For more details refer Documentation/iostats.txt
@@ -128,3 +129,12 @@ Description:
preferred request size for workloads where sustained
throughput is desired. If no optimal I/O size is
reported this file contains 0.
+
+What: /sys/block/<disk>/idle_hysteresis
+Date: November 2009
+Contact: Matthew Garrett <mjg@...hat.com>
+Description:
+ Contains the number of milliseconds to wait after an access
+ before declaring that a disk is idle. Any accesses during
+ this time will reset the timer. "0" (the default) indicates
+ that no events will be generated.
\ No newline at end of file
diff --git a/block/blk-core.c b/block/blk-core.c
index 71da511..f278817 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
if (should_fail_request(bio))
goto end_io;
+ if (bio->bi_bdev->bd_disk->hysteresis_time &&
+ bio_has_data(bio) &&
+ !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
+ jiffies+msecs_to_jiffies
+ (bio->bi_bdev->bd_disk->hysteresis_time))) {
+ bio->bi_bdev->bd_disk->idle = 0;
+ schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
+ }
+
/*
* If this device has partitions, remap block n
* of partition p to block n+start(p) of the disk.
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..ea37e48 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -504,6 +504,21 @@ static int exact_lock(dev_t devt, void *data)
return 0;
}
+static void disk_idle(unsigned long data)
+{
+ struct gendisk *gd = (struct gendisk *)data;
+
+ gd->idle = 1;
+ schedule_work(&gd->idle_notify);
+}
+
+static void disk_idle_notify_thread(struct work_struct *work)
+{
+ struct gendisk *gd = container_of(work, struct gendisk, idle_notify);
+
+ sysfs_notify(&disk_to_dev(gd)->kobj, NULL, "stat");
+}
+
/**
* add_disk - add partitioning information to kernel list
* @disk: per-device partitioning information
@@ -543,6 +558,10 @@ void add_disk(struct gendisk *disk)
blk_register_region(disk_devt(disk), disk->minors, NULL,
exact_match, exact_lock, disk);
+
+ init_timer(&disk->hysteresis_timer);
+ setup_timer(&disk->hysteresis_timer, disk_idle, (unsigned long)disk);
+
register_disk(disk);
blk_register_queue(disk);
@@ -861,6 +880,32 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
}
+static ssize_t disk_idle_hysteresis_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%d\n", disk->hysteresis_time);
+}
+
+static ssize_t disk_idle_hysteresis_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ unsigned long timeout;
+ int res;
+
+ res = strict_strtoul(buf, 10, &timeout);
+ if (res)
+ return -EINVAL;
+
+ disk->hysteresis_time = timeout;
+
+ return count;
+}
+
static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@ -870,6 +915,8 @@ static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(idle_hysteresis, 0644, disk_idle_hysteresis_show,
+ disk_idle_hysteresis_store);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -890,6 +937,7 @@ static struct attribute *disk_attrs[] = {
&dev_attr_capability.attr,
&dev_attr_stat.attr,
&dev_attr_inflight.attr,
+ &dev_attr_idle_hysteresis.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
@@ -1183,6 +1231,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
device_initialize(disk_to_dev(disk));
INIT_WORK(&disk->async_notify,
media_change_notify_thread);
+ INIT_WORK(&disk->idle_notify,
+ disk_idle_notify_thread);
}
return disk;
}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..cccfb7d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -230,6 +230,7 @@ ssize_t part_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
+ struct gendisk *gd = dev_to_disk(dev);
int cpu;
cpu = part_stat_lock();
@@ -238,7 +239,7 @@ ssize_t part_stat_show(struct device *dev,
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
- "%8u %8u %8u"
+ "%8u %8u %8u %1u"
"\n",
part_stat_read(p, ios[READ]),
part_stat_read(p, merges[READ]),
@@ -250,7 +251,8 @@ ssize_t part_stat_show(struct device *dev,
jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
part_in_flight(p),
jiffies_to_msecs(part_stat_read(p, io_ticks)),
- jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+ jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+ gd->idle);
}
ssize_t part_inflight_show(struct device *dev,
@@ -652,6 +654,9 @@ void del_gendisk(struct gendisk *disk)
struct disk_part_iter piter;
struct hd_struct *part;
+ del_timer_sync(&disk->hysteresis_timer);
+ cancel_work_sync(&disk->idle_notify);
+
/* invalidate stuff */
disk_part_iter_init(&piter, disk,
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..7e969a5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/kdev_t.h>
#include <linux/rcupdate.h>
+#include <linux/timer.h>
#ifdef CONFIG_BLOCK
@@ -163,10 +164,15 @@ struct gendisk {
atomic_t sync_io; /* RAID */
struct work_struct async_notify;
+ struct work_struct idle_notify;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity *integrity;
#endif
int node_id;
+
+ bool idle;
+ int hysteresis_time;
+ struct timer_list hysteresis_timer;
};
static inline struct gendisk *part_to_disk(struct hd_struct *part)
--
Matthew Garrett | mjg59@...f.ucam.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists