[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <367d1851-7580-72a9-d8db-a374459dddf4@bytedance.com>
Date: Fri, 25 Sep 2020 11:55:23 +0800
From: zhenwei pi <pizhenwei@...edance.com>
To: axboe@...nel.dk
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: PING: [PATCH] block: add io_error stat for block device
Hi, Jens
How do you think about error stat of a block device?
On 9/10/20 10:20 AM, zhenwei pi wrote:
> Currently if hitting block req error, block layer only prints error
> log with a rate limitation. Then agent has to parse kernel log to
> record what happens.
>
> In this patch, add read/write/discard/flush stat counter to record
> io errors.
>
> Signed-off-by: zhenwei pi <pizhenwei@...edance.com>
> ---
> block/blk-core.c | 14 +++++++++++---
> block/genhd.c | 19 +++++++++++++++++++
> include/linux/part_stat.h | 1 +
> 3 files changed, 31 insertions(+), 3 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 10c08ac50697..8f1424835700 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1558,9 +1558,17 @@ bool blk_update_request(struct request *req, blk_status_t error,
> req->q->integrity.profile->complete_fn(req, nr_bytes);
> #endif
>
> - if (unlikely(error && !blk_rq_is_passthrough(req) &&
> - !(req->rq_flags & RQF_QUIET)))
> - print_req_error(req, error, __func__);
> + if (unlikely(error && !blk_rq_is_passthrough(req))) {
> + if (op_is_flush(req_op(req)))
> + part_stat_inc(&req->rq_disk->part0,
> + io_errors[STAT_FLUSH]);
> + else
> + part_stat_inc(&req->rq_disk->part0,
> + io_errors[op_stat_group(req_op(req))]);
> +
> + if (!(req->rq_flags & RQF_QUIET))
> + print_req_error(req, error, __func__);
> + }
>
> blk_account_io_completion(req, nr_bytes);
>
> diff --git a/block/genhd.c b/block/genhd.c
> index 99c64641c314..852035095485 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -104,6 +104,7 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat)
> stat->sectors[group] += ptr->sectors[group];
> stat->ios[group] += ptr->ios[group];
> stat->merges[group] += ptr->merges[group];
> + stat->io_errors[group] += ptr->io_errors[group];
> }
>
> stat->io_ticks += ptr->io_ticks;
> @@ -1374,6 +1375,22 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
> return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
> }
>
> +static ssize_t io_error_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct hd_struct *p = dev_to_part(dev);
> + struct disk_stats stat;
> +
> + part_stat_read_all(p, &stat);
> +
> + return sprintf(buf,
> + "%8lu %8lu %8lu %8lu\n",
> + stat.io_errors[STAT_READ],
> + stat.io_errors[STAT_WRITE],
> + stat.io_errors[STAT_DISCARD],
> + stat.io_errors[STAT_FLUSH]);
> +}
> +
> static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
> static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
> static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
> @@ -1386,6 +1403,7 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
> static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
> static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
> static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
> +static DEVICE_ATTR(io_error, 0444, io_error_show, NULL);
>
> #ifdef CONFIG_FAIL_MAKE_REQUEST
> ssize_t part_fail_show(struct device *dev,
> @@ -1437,6 +1455,7 @@ static struct attribute *disk_attrs[] = {
> #ifdef CONFIG_FAIL_IO_TIMEOUT
> &dev_attr_fail_timeout.attr,
> #endif
> + &dev_attr_io_error.attr,
> NULL
> };
>
> diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
> index 24125778ef3e..4fe3836d2308 100644
> --- a/include/linux/part_stat.h
> +++ b/include/linux/part_stat.h
> @@ -9,6 +9,7 @@ struct disk_stats {
> unsigned long sectors[NR_STAT_GROUPS];
> unsigned long ios[NR_STAT_GROUPS];
> unsigned long merges[NR_STAT_GROUPS];
> + unsigned long io_errors[NR_STAT_GROUPS];
> unsigned long io_ticks;
> local_t in_flight[2];
> };
>
--
zhenwei pi
Powered by blists - more mailing lists