[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANubcdWajHf_vJQpcsmLvU8U=Vd9Q2=9E4_mpncRy2A0iiMnWQ@mail.gmail.com>
Date: Mon, 19 Jan 2026 14:53:53 +0800
From: Stephen Zhang <starzhangzsd@...il.com>
To: Coly Li <colyli@...as.com>
Cc: Christoph Hellwig <hch@...radead.org>, kent.overstreet@...ux.dev, axboe@...nel.dk,
sashal@...nel.org, linux-bcache@...r.kernel.org, linux-kernel@...r.kernel.org,
zhangshida@...inos.cn
Subject: Re: [PATCH] bcache: fix double bio_endio completion in detached_dev_end_io
Coly Li <colyli@...as.com> 于2026年1月18日周日 19:49写道:
>
> On Thu, Jan 15, 2026 at 12:29:15AM +0800, Christoph Hellwig wrote:
> > Can you please test this patch?
> >
>
> Shida,
> can you also test it and confirm? We need to get the fix in within 6.19 cycle.
>
> Yes, we need to make a dicision ASAP.
> I prefer the clone bio solution, and it looks fine to me.
>
> Thanks in advance.
>
> Coly Li
>
>
>
>
> > commit d14f13516f60424f3cffc6d1837be566e360a8a3
> > Author: Christoph Hellwig <hch@....de>
> > Date: Tue Jan 13 09:53:34 2026 +0100
> >
> > bcache: clone bio in detached_dev_do_request
> >
> > Not-yet-Signed-off-by: Christoph Hellwig <hch@....de>
> >
Thank you, Coly and Christoph, for giving me the opportunity to continue
your outstanding work on this patch.
If given the chance to complete the next steps, I will begin by adding a
proper commit message:
bcache: use bio cloning for detached device requests
Previously, bcache hijacked the bi_end_io and bi_private fields of the incoming
bio when the backing device was in a detached state. This is fragile and
breaks if the bio is needed to be processed by other layers.
This patch transitions to using a cloned bio embedded within a private
structure.
This ensures the original bio's metadata remains untouched.
Co-developed-by: Christoph Hellwig <hch@....de>
Signed-off-by: Christoph Hellwig <hch@....de>
Signed-off-by: Shida Zhang <zhangshida@...inos.cn>
Additionally, I would like to conduct a thorough code review to identify any
potential issues that may not be easily caught through normal testing.
> > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> > index 82fdea7dea7a..9e7b59121313 100644
> > --- a/drivers/md/bcache/request.c
> > +++ b/drivers/md/bcache/request.c
> > @@ -1078,67 +1078,66 @@ static CLOSURE_CALLBACK(cached_dev_nodata)
> > }
> >
> > struct detached_dev_io_private {
> > - struct bcache_device *d;
> > unsigned long start_time;
> > - bio_end_io_t *bi_end_io;
> > - void *bi_private;
> > - struct block_device *orig_bdev;
> > + struct bio *orig_bio;
> > + struct bio bio;
> > };
> >
> > static void detached_dev_end_io(struct bio *bio)
> > {
> > - struct detached_dev_io_private *ddip;
> > -
> > - ddip = bio->bi_private;
> > - bio->bi_end_io = ddip->bi_end_io;
> > - bio->bi_private = ddip->bi_private;
> > + struct detached_dev_io_private *ddip =
> > + container_of(bio, struct detached_dev_io_private, bio);
> > + struct bio *orig_bio = ddip->orig_bio;
> >
> > /* Count on the bcache device */
> > - bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
> > + bio_end_io_acct(orig_bio, ddip->start_time);
> >
> > if (bio->bi_status) {
> > - struct cached_dev *dc = container_of(ddip->d,
> > - struct cached_dev, disk);
> > + struct cached_dev *dc = bio->bi_private;
> > +
> > /* should count I/O error for backing device here */
> > bch_count_backing_io_errors(dc, bio);
> > + orig_bio->bi_status = bio->bi_status;
> > }
> >
bio_init_clone must be paired with a bio_uninit() call before the
memory is freed?
+ bio_uninit(bio);
Thanks,
Shida
> > kfree(ddip);
> > - bio_endio(bio);
> > + bio_endio(orig_bio);
> > }
> >
> > -static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
> > - struct block_device *orig_bdev, unsigned long start_time)
> > +static void detached_dev_do_request(struct bcache_device *d,
> > + struct bio *orig_bio, unsigned long start_time)
> > {
> > struct detached_dev_io_private *ddip;
> > struct cached_dev *dc = container_of(d, struct cached_dev, disk);
> >
> > + if (bio_op(orig_bio) == REQ_OP_DISCARD &&
> > + !bdev_max_discard_sectors(dc->bdev)) {
> > + bio_endio(orig_bio);
> > + return;
> > + }
> > +
> > /*
> > * no need to call closure_get(&dc->disk.cl),
> > * because upper layer had already opened bcache device,
> > * which would call closure_get(&dc->disk.cl)
> > */
> > ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
> > - if (!ddip) {
> > - bio->bi_status = BLK_STS_RESOURCE;
> > - bio_endio(bio);
> > - return;
> > - }
> > -
> > - ddip->d = d;
> > + if (!ddip)
> > + goto enomem;
> > + if (bio_init_clone(dc->bdev, &ddip->bio, orig_bio, GFP_NOIO))
> > + goto free_ddip;
> > /* Count on the bcache device */
> > - ddip->orig_bdev = orig_bdev;
> > ddip->start_time = start_time;
> > - ddip->bi_end_io = bio->bi_end_io;
> > - ddip->bi_private = bio->bi_private;
> > - bio->bi_end_io = detached_dev_end_io;
> > - bio->bi_private = ddip;
> > -
> > - if ((bio_op(bio) == REQ_OP_DISCARD) &&
> > - !bdev_max_discard_sectors(dc->bdev))
> > - detached_dev_end_io(bio);
> > - else
> > - submit_bio_noacct(bio);
> > + ddip->orig_bio = orig_bio;
> > + ddip->bio.bi_end_io = detached_dev_end_io;
> > + ddip->bio.bi_private = dc;
> > + submit_bio_noacct(&ddip->bio);
> > + return;
> > +free_ddip:
> > + kfree(ddip);
> > +enomem:
> > + orig_bio->bi_status = BLK_STS_RESOURCE;
> > + bio_endio(orig_bio);
> > }
> >
> > static void quit_max_writeback_rate(struct cache_set *c,
> > @@ -1214,10 +1213,10 @@ void cached_dev_submit_bio(struct bio *bio)
> >
> > start_time = bio_start_io_acct(bio);
> >
> > - bio_set_dev(bio, dc->bdev);
> > bio->bi_iter.bi_sector += dc->sb.data_offset;
> >
> > if (cached_dev_get(dc)) {
> > + bio_set_dev(bio, dc->bdev);
> > s = search_alloc(bio, d, orig_bdev, start_time);
> > trace_bcache_request_start(s->d, bio);
> >
> > @@ -1237,9 +1236,10 @@ void cached_dev_submit_bio(struct bio *bio)
> > else
> > cached_dev_read(dc, s);
> > }
> > - } else
> > + } else {
> > /* I/O request sent to backing device */
> > - detached_dev_do_request(d, bio, orig_bdev, start_time);
> > + detached_dev_do_request(d, bio, start_time);
> > + }
> > }
> >
> > static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
Powered by blists - more mailing lists