[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZJuzYMeVhP5cthbC@ovpn-8-21.pek2.redhat.com>
Date: Wed, 28 Jun 2023 12:13:20 +0800
From: Ming Lei <ming.lei@...hat.com>
To: chengming.zhou@...ux.dev
Cc: axboe@...nel.dk, tj@...nel.org, hch@....de,
linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
zhouchengming@...edance.com
Subject: Re: [PATCH 2/4] blk-flush: count inflight flush_data requests
On Tue, Jun 27, 2023 at 08:08:52PM +0800, chengming.zhou@...ux.dev wrote:
> From: Chengming Zhou <zhouchengming@...edance.com>
>
> The flush state machine use a double list to link all inflight
> flush_data requests, to avoid issuing separate post-flushes for
> these flush_data requests which shared PREFLUSH.
>
> So we can't reuse rq->queuelist, this is why we need rq->flush.list
>
> In preparation of the next patch that reuse rq->queuelist for flush
> state machine, we change the double linked list to a u64 counter,
> which count all inflight flush_data requests.
>
> This is ok since we only need to know if there is any inflight
> flush_data request, so a u64 counter is good. The only problem I can
> think of is that u64 counter may overflow, which should be unlikely happen.
It won't overflow, q->nr_requests is 'unsigned long', which should have
been limited to one more reasonable value, such as 2 * BLK_MQ_MAX_DEPTH, so
u16 should be big enough in theory.
>
> Signed-off-by: Chengming Zhou <zhouchengming@...edance.com>
> ---
> block/blk-flush.c | 9 +++++----
> block/blk.h | 5 ++---
> 2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index dba392cf22be..bb7adfc2a5da 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -187,7 +187,8 @@ static void blk_flush_complete_seq(struct request *rq,
> break;
>
> case REQ_FSEQ_DATA:
> - list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
> + list_del_init(&rq->flush.list);
> + fq->flush_data_in_flight++;
> spin_lock(&q->requeue_lock);
> list_add_tail(&rq->queuelist, &q->flush_list);
> spin_unlock(&q->requeue_lock);
> @@ -299,7 +300,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
> return;
>
> /* C2 and C3 */
> - if (!list_empty(&fq->flush_data_in_flight) &&
> + if (fq->flush_data_in_flight &&
> time_before(jiffies,
> fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
> return;
> @@ -374,6 +375,7 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
> * the comment in flush_end_io().
> */
> spin_lock_irqsave(&fq->mq_flush_lock, flags);
> + fq->flush_data_in_flight--;
> blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
> spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
>
> @@ -445,7 +447,7 @@ bool blk_insert_flush(struct request *rq)
> blk_rq_init_flush(rq);
> rq->flush.seq |= REQ_FSEQ_POSTFLUSH;
> spin_lock_irq(&fq->mq_flush_lock);
> - list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
> + fq->flush_data_in_flight++;
> spin_unlock_irq(&fq->mq_flush_lock);
> return false;
> default:
> @@ -496,7 +498,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
>
> INIT_LIST_HEAD(&fq->flush_queue[0]);
> INIT_LIST_HEAD(&fq->flush_queue[1]);
> - INIT_LIST_HEAD(&fq->flush_data_in_flight);
>
> return fq;
>
> diff --git a/block/blk.h b/block/blk.h
> index 608c5dcc516b..686712e13835 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -15,15 +15,14 @@ struct elevator_type;
> extern struct dentry *blk_debugfs_root;
>
> struct blk_flush_queue {
> + spinlock_t mq_flush_lock;
> unsigned int flush_pending_idx:1;
> unsigned int flush_running_idx:1;
> blk_status_t rq_status;
> unsigned long flush_pending_since;
> struct list_head flush_queue[2];
> - struct list_head flush_data_in_flight;
> + unsigned long flush_data_in_flight;
> struct request *flush_rq;
> -
> - spinlock_t mq_flush_lock;
> };
The part of replacing inflight data rq list with counter looks fine.
Thanks,
Ming
Powered by blists - more mailing lists