lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 2 Dec 2010 10:49:32 +0100 (CET)
From:	Lukas Czerner <lczerner@...hat.com>
To:	"Darrick J. Wong" <djwong@...ibm.com>
cc:	Jens Axboe <axboe@...nel.dk>, "Theodore Ts'o" <tytso@....edu>,
	Neil Brown <neilb@...e.de>,
	Andreas Dilger <adilger.kernel@...ger.ca>,
	Alasdair G Kergon <agk@...hat.com>, Jan Kara <jack@...e.cz>,
	Mike Snitzer <snitzer@...hat.com>,
	linux-kernel <linux-kernel@...r.kernel.org>,
	linux-raid@...r.kernel.org, Keith Mannthey <kmannth@...ibm.com>,
	dm-devel@...hat.com, Mingming Cao <cmm@...ibm.com>,
	Tejun Heo <tj@...nel.org>, linux-ext4@...r.kernel.org,
	Ric Wheeler <rwheeler@...hat.com>,
	Christoph Hellwig <hch@....de>, Josef Bacik <josef@...hat.com>
Subject: Re: [PATCH 1/4] block: Measure flush round-trip times and report
 average value

On Mon, 29 Nov 2010, Darrick J. Wong wrote:

> This patch adds to the block layer the ability to intercept flush requests for
> the purpose of measuring the round-trip times of the write-cache flush command
> on whatever hardware sits underneath the block layer.  The eventual intent of
> this patch is for higher-level clients (filesystems) to be able to measure
> flush times to tune their use of them.
> 
> Signed-off-by: Darrick J. Wong <djwong@...ibm.com>
> ---
>  block/blk-core.c          |   13 +++++++++++++
>  block/genhd.c             |   39 +++++++++++++++++++++++++++++++++++++++
>  fs/bio.c                  |   11 +++++++++++
>  include/linux/blk_types.h |    2 ++
>  include/linux/blkdev.h    |    2 ++
>  include/linux/genhd.h     |   23 +++++++++++++++++++++++
>  6 files changed, 90 insertions(+), 0 deletions(-)
> 
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 4ce953f..233573a 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1540,6 +1540,9 @@ static inline void __generic_make_request(struct bio *bio)
>  
>  		trace_block_bio_queue(q, bio);
>  
> +		if (bio->bi_rw & REQ_FLUSH)
> +			bio->flush_start_time_ns = ktime_to_ns(ktime_get());
> +
>  		ret = q->make_request_fn(q, bio);
>  	} while (ret);
>  
> @@ -1954,6 +1957,9 @@ void blk_start_request(struct request *req)
>  		req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
>  
>  	blk_add_timer(req);
> +
> +	if (req->cmd_flags & REQ_FLUSH)
> +		req->flush_start_time_ns = ktime_to_ns(ktime_get());
>  }
>  EXPORT_SYMBOL(blk_start_request);
>  
> @@ -2182,6 +2188,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
>   */
>  static void blk_finish_request(struct request *req, int error)
>  {
> +	if (!error && req->cmd_flags & REQ_FLUSH) {
> +		u64 delta;
> +
> +		delta = ktime_to_ns(ktime_get()) - req->flush_start_time_ns;
> +		update_flush_times(req->rq_disk, delta);
> +	}
> +
>  	if (blk_rq_tagged(req))
>  		blk_queue_end_tag(req->q, req);
>  
> diff --git a/block/genhd.c b/block/genhd.c
> index 5fa2b44..465bd00 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -538,6 +538,9 @@ void add_disk(struct gendisk *disk)
>  	 */
>  	disk->major = MAJOR(devt);
>  	disk->first_minor = MINOR(devt);
> +	spin_lock_init(&disk->flush_time_lock);
> +	disk->avg_flush_time_ns = 0;
> +	disk->flush_samples = 0;
>  
>  	/* Register BDI before referencing it from bdev */ 
>  	bdi = &disk->queue->backing_dev_info;
> @@ -824,6 +827,37 @@ static ssize_t disk_range_show(struct device *dev,
>  	return sprintf(buf, "%d\n", disk->minors);
>  }
>  
> +static ssize_t disk_flush_samples_show(struct device *dev,
> +				   struct device_attribute *attr, char *buf)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +
> +	return sprintf(buf, "%llu\n", disk->flush_samples);
> +}
> +
> +static ssize_t disk_avg_flush_time_ns_show(struct device *dev,
> +				   struct device_attribute *attr, char *buf)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +
> +	return sprintf(buf, "%llu\n", disk->avg_flush_time_ns);
> +}
> +
> +static ssize_t disk_avg_flush_time_ns_store(struct device *dev,
> +					    struct device_attribute *attr,
> +					    const char *buf, size_t count)

Hi,

This is not exactly helpful name for the function since it does not
store anything, but rather clear the counters. Would not be
disk_avg_flush_time_ns_clear a better name ?

Thanks!

-Lukas

> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +
> +	spin_lock(&disk->flush_time_lock);
> +	disk->avg_flush_time_ns = 0;
> +	disk->flush_samples = 0;
> +	spin_unlock(&disk->flush_time_lock);
> +
> +	return count;
> +}
> +
> +
>  static ssize_t disk_ext_range_show(struct device *dev,
>  				   struct device_attribute *attr, char *buf)
>  {
> @@ -876,6 +910,9 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
>  }
>  
>  static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
> +static DEVICE_ATTR(avg_flush_time_ns, S_IRUGO | S_IWUSR,
> +		   disk_avg_flush_time_ns_show, disk_avg_flush_time_ns_store);
> +static DEVICE_ATTR(flush_samples, S_IRUGO, disk_flush_samples_show, NULL);
>  static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
>  static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
>  static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
> @@ -898,6 +935,8 @@ static struct device_attribute dev_attr_fail_timeout =
>  
>  static struct attribute *disk_attrs[] = {
>  	&dev_attr_range.attr,
> +	&dev_attr_avg_flush_time_ns.attr,
> +	&dev_attr_flush_samples.attr,
>  	&dev_attr_ext_range.attr,
>  	&dev_attr_removable.attr,
>  	&dev_attr_ro.attr,
> diff --git a/fs/bio.c b/fs/bio.c
> index 4bd454f..aab5f27 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -1442,11 +1442,22 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
>   **/
>  void bio_endio(struct bio *bio, int error)
>  {
> +	struct request_queue *q = NULL;
> +
>  	if (error)
>  		clear_bit(BIO_UPTODATE, &bio->bi_flags);
>  	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
>  		error = -EIO;
>  
> +	if (bio->bi_bdev)
> +		q = bdev_get_queue(bio->bi_bdev);
> +	if (!(q && q->prep_rq_fn) && !error && bio->bi_rw & REQ_FLUSH) {
> +		u64 delta;
> +
> +		delta = ktime_to_ns(ktime_get()) - bio->flush_start_time_ns;
> +		update_flush_times(bio->bi_bdev->bd_disk, delta);
> +	}
> +
>  	if (bio->bi_end_io)
>  		bio->bi_end_io(bio, error);
>  }
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 46ad519..e8c29b9 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -74,6 +74,8 @@ struct bio {
>  
>  	bio_destructor_t	*bi_destructor;	/* destructor */
>  
> +	u64			flush_start_time_ns;
> +
>  	/*
>  	 * We can inline a number of vecs at the end of the bio, to avoid
>  	 * double allocations for a small number of bio_vecs. This member
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index aae86fd..357d669 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -163,6 +163,8 @@ struct request {
>  
>  	/* for bidi */
>  	struct request *next_rq;
> +
> +	u64 flush_start_time_ns;
>  };
>  
>  static inline unsigned short req_get_ioprio(struct request *req)
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 7a7b9c1..7097396 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -178,8 +178,31 @@ struct gendisk {
>  	struct blk_integrity *integrity;
>  #endif
>  	int node_id;
> +
> +	spinlock_t flush_time_lock;
> +	u64 avg_flush_time_ns;
> +	u64 flush_samples;
>  };
>  
> +static inline void update_flush_times(struct gendisk *disk, u64 delta)
> +{
> +	u64 data;
> +
> +	spin_lock(&disk->flush_time_lock);
> +	if (disk->flush_samples < 256)
> +		disk->flush_samples++;
> +	if (!disk->avg_flush_time_ns)
> +		disk->avg_flush_time_ns = delta;
> +	else {
> +		data = disk->avg_flush_time_ns;
> +		data *= 255;
> +		data += delta;
> +		data /= 256;
> +		disk->avg_flush_time_ns = data;
> +	}
> +	spin_unlock(&disk->flush_time_lock);
> +}
> +
>  static inline struct gendisk *part_to_disk(struct hd_struct *part)
>  {
>  	if (likely(part)) {
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ