linux-kernel - Re: [PATCH] blk-mq: plug based timestamp caching

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <086a1238-da03-506c-0e94-ac79c462bbbc@linux.dev>
Date:   Thu, 27 Jul 2023 23:35:27 +0800
From:   Chengming Zhou <chengming.zhou@...ux.dev>
To:     axboe@...nel.dk, tj@...nel.org
Cc:     linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] blk-mq: plug based timestamp caching

Hello, Jens and Tejun, does this patch look fine to you?
Looking forward to your comments.

Thanks.

On 2023/7/17 16:16, chengming.zhou@...ux.dev wrote:
> From: Chengming Zhou <zhouchengming@...edance.com>
> 
> This idea is from Tejun [1] that don't like manually optimized timestamp
> reads, so come up the plug based timestamp caching infrastructure, which
> is more generic and has better performance. It works since we don't care
> about nanosec accuracy.
> 
> Have the plug init start with the timestamp invalid, and use blk_get_time()
> helper that return the time for no plug, and set it in the plug if not set.
> Flushing the plug would mark it invalid again at the end.
> 
> We replaces all "alloc_time_ns", "start_time_ns" and "io_start_time_ns"
> settings to use the blk_get_time() helper.
> 
> The only direct caller of ktime_get_ns() left in blk-mq is in request end,
> which don't use cached timestamp for better accuracy of completion time.
> 
> [1] https://lore.kernel.org/lkml/ZLA7QAfSojxu_FMW@slm.duckdns.org/
> 
> Suggested-by: Tejun Heo <tj@...nel.org>
> Suggested-by: Jens Axboe <axboe@...nel.dk>
> Signed-off-by: Chengming Zhou <zhouchengming@...edance.com>
> ---
>  block/blk-core.c       |  3 +++
>  block/blk-mq.c         | 22 +++++++++++++++++-----
>  include/linux/blkdev.h |  2 ++
>  3 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 90de50082146..a63d33af7287 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1054,6 +1054,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
>  		return;
>  
>  	plug->mq_list = NULL;
> +	plug->cached_time_ns = 0;
>  	plug->cached_rq = NULL;
>  	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
>  	plug->rq_count = 0;
> @@ -1153,6 +1154,8 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
>  	 */
>  	if (unlikely(!rq_list_empty(plug->cached_rq)))
>  		blk_mq_free_plug_rqs(plug);
> +
> +	plug->cached_time_ns = 0;
>  }
>  
>  /**
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index b04ff6f56926..54648bfaab9c 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -311,6 +311,18 @@ void blk_mq_wake_waiters(struct request_queue *q)
>  			blk_mq_tag_wakeup_all(hctx->tags, true);
>  }
>  
> +static inline u64 blk_get_time(void)
> +{
> +	struct blk_plug *plug = current->plug;
> +
> +	if (!plug)
> +		return ktime_get_ns();
> +
> +	if (!plug->cached_time_ns)
> +		plug->cached_time_ns = ktime_get_ns();
> +	return plug->cached_time_ns;
> +}
> +
>  void blk_rq_init(struct request_queue *q, struct request *rq)
>  {
>  	memset(rq, 0, sizeof(*rq));
> @@ -322,7 +334,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
>  	RB_CLEAR_NODE(&rq->rb_node);
>  	rq->tag = BLK_MQ_NO_TAG;
>  	rq->internal_tag = BLK_MQ_NO_TAG;
> -	rq->start_time_ns = ktime_get_ns();
> +	rq->start_time_ns = blk_get_time();
>  	rq->part = NULL;
>  	blk_crypto_rq_set_defaults(rq);
>  }
> @@ -332,7 +344,7 @@ EXPORT_SYMBOL(blk_rq_init);
>  static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
>  {
>  	if (blk_mq_need_time_stamp(rq))
> -		rq->start_time_ns = ktime_get_ns();
> +		rq->start_time_ns = blk_get_time();
>  	else
>  		rq->start_time_ns = 0;
>  
> @@ -441,7 +453,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>  
>  	/* alloc_time includes depth and tag waits */
>  	if (blk_queue_rq_alloc_time(q))
> -		alloc_time_ns = ktime_get_ns();
> +		alloc_time_ns = blk_get_time();
>  
>  	if (data->cmd_flags & REQ_NOWAIT)
>  		data->flags |= BLK_MQ_REQ_NOWAIT;
> @@ -624,7 +636,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
>  
>  	/* alloc_time includes depth and tag waits */
>  	if (blk_queue_rq_alloc_time(q))
> -		alloc_time_ns = ktime_get_ns();
> +		alloc_time_ns = blk_get_time();
>  
>  	/*
>  	 * If the tag allocator sleeps we could get an allocation for a
> @@ -1235,7 +1247,7 @@ void blk_mq_start_request(struct request *rq)
>  	trace_block_rq_issue(rq);
>  
>  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
> -		rq->io_start_time_ns = ktime_get_ns();
> +		rq->io_start_time_ns = blk_get_time();
>  		rq->stats_sectors = blk_rq_sectors(rq);
>  		rq->rq_flags |= RQF_STATS;
>  		rq_qos_issue(q, rq);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ed44a997f629..21a3d4d7ab2b 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -961,6 +961,8 @@ void blk_mark_disk_dead(struct gendisk *disk);
>  struct blk_plug {
>  	struct request *mq_list; /* blk-mq requests */
>  
> +	u64 cached_time_ns;
> +
>  	/* if ios_left is > 1, we can batch tag/rq allocations */
>  	struct request *cached_rq;
>  	unsigned short nr_ios;