[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6fd92550-9252-a3ca-90b7-f55d6e39b63c@yandex-team.ru>
Date: Mon, 2 Mar 2020 16:51:12 +0300
From: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
To: linux-block@...r.kernel.org, Jens Axboe <axboe@...nel.dk>,
linux-kernel@...r.kernel.org
Cc: Mikulas Patocka <mpatocka@...hat.com>,
Mike Snitzer <snitzer@...hat.com>
Subject: Re: [PATCH 2/3] block/diskstats: accumulate all per-cpu counters in
one pass
On 02/03/2020 13.38, Konstantin Khlebnikov wrote:
> Reading /proc/diskstats iterates over all cpus for summing each field.
> It's faster to sum all fields in one pass.
>
> Hammering /proc/diskstats with fio shows 2x performance improvement:
>
> fio --name=test --numjobs=$JOBS --filename=/proc/diskstats \
> --size=1k --bs=1k --fallocate=none --create_on_open=1 \
> --time_based=1 --runtime=10 --invalidate=0 --group_report
>
> JOBS=1 JOBS=10
> Before: 7k iops 64k iops
> After: 18k iops 120k iops
>
> Also this way code is more compact:
>
> add/remove: 1/0 grow/shrink: 0/2 up/down: 194/-1540 (-1346)
> Function old new delta
> part_stat_read_all - 194 +194
> diskstats_show 1344 631 -713
> part_stat_show 1219 392 -827
> Total: Before=14966947, After=14965601, chg -0.01%
>
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
> ---
> block/genhd.c | 62 +++++++++++++++++++++++++++++++++------------
> block/partition-generic.c | 35 ++++++++++++++-----------
> include/linux/genhd.h | 10 +++++--
> 3 files changed, 72 insertions(+), 35 deletions(-)
>
> diff --git a/block/genhd.c b/block/genhd.c
> index ff6268970ddc..fcc597c243f7 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -46,6 +46,29 @@ static void disk_add_events(struct gendisk *disk);
> static void disk_del_events(struct gendisk *disk);
> static void disk_release_events(struct gendisk *disk);
>
> +#ifdef CONFIG_SMP
> +void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
> +{
> + int cpu;
> +
> + memset(stat, 0, sizeof(struct disk_stats));
> + for_each_possible_cpu(cpu) {
> + struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu);
> + int group;
> +
> + for (group = 0; group < NR_STAT_GROUPS; group++) {
> + stat->nsecs[group] += ptr->nsecs[group];
> + stat->sectors[group] += ptr->sectors[group];
> + stat->ios[group] += ptr->ios[group];
> + stat->merges[group] += ptr->merges[group];
> + }
> +
> + stat->io_ticks += ptr->io_ticks;
> + stat->time_in_queue += ptr->time_in_queue;
> + }
> +}
> +#endif /* CONFIG_SMP */
> +
> void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
> {
> if (queue_is_mq(q))
> @@ -1369,6 +1392,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
> struct hd_struct *hd;
> char buf[BDEVNAME_SIZE];
> unsigned int inflight;
> + struct disk_stats stat;
>
> /*
> if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
> @@ -1380,7 +1404,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
>
> disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
> while ((hd = disk_part_iter_next(&piter))) {
> + part_stat_read_all(hd, &stat);
> inflight = part_in_flight(gp->queue, hd);
> +
> seq_printf(seqf, "%4d %7d %s "
> "%lu %lu %lu %u "
> "%lu %lu %lu %u "
> @@ -1390,23 +1416,27 @@ static int diskstats_show(struct seq_file *seqf, void *v)
> "\n",
> MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
> disk_name(gp, hd->partno, buf),
> - part_stat_read(hd, ios[STAT_READ]),
> - part_stat_read(hd, merges[STAT_READ]),
> - part_stat_read(hd, sectors[STAT_READ]),
> - (unsigned int)part_stat_read_msecs(hd, STAT_READ),
> - part_stat_read(hd, ios[STAT_WRITE]),
> - part_stat_read(hd, merges[STAT_WRITE]),
> - part_stat_read(hd, sectors[STAT_WRITE]),
> - (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
> + stat.ios[STAT_READ],
> + stat.merges[STAT_READ],
> + stat.sectors[STAT_READ],
> + (unsigned int)div_u64(stat.nsecs[STAT_READ],
> + NSEC_PER_MSEC),
> + stat.ios[STAT_WRITE],
> + stat.merges[STAT_WRITE],
> + stat.sectors[STAT_WRITE],
> + (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
> + NSEC_PER_MSEC),
> inflight,
> - jiffies_to_msecs(part_stat_read(hd, io_ticks)),
> - jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
> - part_stat_read(hd, ios[STAT_DISCARD]),
> - part_stat_read(hd, merges[STAT_DISCARD]),
> - part_stat_read(hd, sectors[STAT_DISCARD]),
> - (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
> - part_stat_read(hd, ios[STAT_FLUSH]),
> - (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
> + jiffies_to_msecs(stat.io_ticks),
> + jiffies_to_msecs(stat.time_in_queue),
> + stat.ios[STAT_DISCARD],
> + stat.merges[STAT_DISCARD],
> + stat.sectors[STAT_DISCARD],
> + (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
> + NSEC_PER_MSEC),
> + stat.ios[STAT_FLUSH],
> + (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
> + NSEC_PER_MSEC)
> );
> }
> disk_part_iter_exit(&piter);
> diff --git a/block/partition-generic.c b/block/partition-generic.c
> index 564fae77711d..6d3fcb5187cb 100644
> --- a/block/partition-generic.c
> +++ b/block/partition-generic.c
> @@ -120,9 +120,12 @@ ssize_t part_stat_show(struct device *dev,
> {
> struct hd_struct *p = dev_to_part(dev);
> struct request_queue *q = part_to_disk(p)->queue;
> + struct disk_stats stat;
> unsigned int inflight;
>
> + part_stat_read_all(p, &stat);
> inflight = part_in_flight(q, p);
> +
> return sprintf(buf,
> "%8lu %8lu %8llu %8u "
> "%8lu %8lu %8llu %8u "
> @@ -130,23 +133,23 @@ ssize_t part_stat_show(struct device *dev,
> "%8lu %8lu %8llu %8u "
> "%8lu %8u"
> "\n",
> - part_stat_read(p, ios[STAT_READ]),
> - part_stat_read(p, merges[STAT_READ]),
> - (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
> - (unsigned int)part_stat_read_msecs(p, STAT_READ),
> - part_stat_read(p, ios[STAT_WRITE]),
> - part_stat_read(p, merges[STAT_WRITE]),
> - (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
> - (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
> + stat.ios[STAT_READ],
> + stat.merges[STAT_READ],
> + (unsigned long long)stat.sectors[STAT_READ],
> + (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
> + stat.ios[STAT_WRITE],
> + stat.merges[STAT_WRITE],
> + (unsigned long long)stat.sectors[STAT_WRITE],
> + (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
> inflight,
> - jiffies_to_msecs(part_stat_read(p, io_ticks)),
> - jiffies_to_msecs(part_stat_read(p, time_in_queue)),
> - part_stat_read(p, ios[STAT_DISCARD]),
> - part_stat_read(p, merges[STAT_DISCARD]),
> - (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
> - (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
> - part_stat_read(p, ios[STAT_FLUSH]),
> - (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
> + jiffies_to_msecs(stat.io_ticks),
> + jiffies_to_msecs(stat.time_in_queue),
> + stat.ios[STAT_DISCARD],
> + stat.merges[STAT_DISCARD],
> + (unsigned long long)stat.sectors[STAT_DISCARD],
> + (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
> + stat.ios[STAT_FLUSH],
> + (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
> }
>
> ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index d5ff7023daa8..f1fca2c57092 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -339,6 +339,8 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
> sizeof(struct disk_stats));
> }
>
> +void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat);
> +
> static inline int init_part_stats(struct hd_struct *part)
> {
> part->dkstats = alloc_percpu(struct disk_stats);
> @@ -365,6 +367,11 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
> memset(&part->dkstats, value, sizeof(struct disk_stats));
> }
>
> +void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
^-- forgot "static inline" here
> +{
> + memcpy(stat, &part->dkstats, sizeof(struct disk_stats));
> +}
> +
> static inline int init_part_stats(struct hd_struct *part)
> {
> return 1;
> @@ -376,9 +383,6 @@ static inline void free_part_stats(struct hd_struct *part)
>
> #endif /* CONFIG_SMP */
>
> -#define part_stat_read_msecs(part, which) \
> - div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
> -
> #define part_stat_read_accum(part, field) \
> (part_stat_read(part, field[STAT_READ]) + \
> part_stat_read(part, field[STAT_WRITE]) + \
>
Powered by blists - more mailing lists