lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANubcdWajHf_vJQpcsmLvU8U=Vd9Q2=9E4_mpncRy2A0iiMnWQ@mail.gmail.com>
Date: Mon, 19 Jan 2026 14:53:53 +0800
From: Stephen Zhang <starzhangzsd@...il.com>
To: Coly Li <colyli@...as.com>
Cc: Christoph Hellwig <hch@...radead.org>, kent.overstreet@...ux.dev, axboe@...nel.dk, 
	sashal@...nel.org, linux-bcache@...r.kernel.org, linux-kernel@...r.kernel.org, 
	zhangshida@...inos.cn
Subject: Re: [PATCH] bcache: fix double bio_endio completion in detached_dev_end_io

Coly Li <colyli@...as.com> 于2026年1月18日周日 19:49写道:
>
> On Thu, Jan 15, 2026 at 12:29:15AM +0800, Christoph Hellwig wrote:
> > Can you please test this patch?
> >
>
> Shida,
> can you also test it and confirm? We need to get the fix in within 6.19 cycle.
>
> Yes, we need to make a dicision ASAP.
> I prefer the clone bio solution, and it looks fine to me.
>
> Thanks in advance.
>
> Coly Li
>
>
>
>
> > commit d14f13516f60424f3cffc6d1837be566e360a8a3
> > Author: Christoph Hellwig <hch@....de>
> > Date:   Tue Jan 13 09:53:34 2026 +0100
> >
> >     bcache: clone bio in detached_dev_do_request
> >
> >     Not-yet-Signed-off-by: Christoph Hellwig <hch@....de>
> >

Thank you, Coly and Christoph, for giving me the opportunity to continue
your outstanding work on this patch.

If given the chance to complete the next steps, I will begin by adding a
proper commit message:

bcache: use bio cloning for detached device requests

Previously, bcache hijacked the bi_end_io and bi_private fields of the incoming
bio when the backing device was in a detached state. This is fragile and
breaks if the bio is needed to be processed by other layers.

This patch transitions to using a cloned bio embedded within a private
structure.
This ensures the original bio's metadata remains untouched.

Co-developed-by: Christoph Hellwig <hch@....de>
Signed-off-by: Christoph Hellwig <hch@....de>
Signed-off-by: Shida Zhang <zhangshida@...inos.cn>


Additionally, I would like to conduct a thorough code review to identify any
potential issues that may not be easily caught through normal testing.

> > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> > index 82fdea7dea7a..9e7b59121313 100644
> > --- a/drivers/md/bcache/request.c
> > +++ b/drivers/md/bcache/request.c
> > @@ -1078,67 +1078,66 @@ static CLOSURE_CALLBACK(cached_dev_nodata)
> >  }
> >
> >  struct detached_dev_io_private {
> > -     struct bcache_device    *d;
> >       unsigned long           start_time;
> > -     bio_end_io_t            *bi_end_io;
> > -     void                    *bi_private;
> > -     struct block_device     *orig_bdev;
> > +     struct bio              *orig_bio;
> > +     struct bio              bio;
> >  };
> >
> >  static void detached_dev_end_io(struct bio *bio)
> >  {
> > -     struct detached_dev_io_private *ddip;
> > -
> > -     ddip = bio->bi_private;
> > -     bio->bi_end_io = ddip->bi_end_io;
> > -     bio->bi_private = ddip->bi_private;
> > +     struct detached_dev_io_private *ddip =
> > +             container_of(bio, struct detached_dev_io_private, bio);
> > +     struct bio *orig_bio = ddip->orig_bio;
> >
> >       /* Count on the bcache device */
> > -     bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
> > +     bio_end_io_acct(orig_bio, ddip->start_time);
> >
> >       if (bio->bi_status) {
> > -             struct cached_dev *dc = container_of(ddip->d,
> > -                                                  struct cached_dev, disk);
> > +             struct cached_dev *dc = bio->bi_private;
> > +
> >               /* should count I/O error for backing device here */
> >               bch_count_backing_io_errors(dc, bio);
> > +             orig_bio->bi_status = bio->bi_status;
> >       }
> >

bio_init_clone must be paired with a bio_uninit() call before the
memory is freed?

+ bio_uninit(bio);


Thanks,
Shida

> >       kfree(ddip);
> > -     bio_endio(bio);
> > +     bio_endio(orig_bio);
> >  }
> >
> > -static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
> > -             struct block_device *orig_bdev, unsigned long start_time)
> > +static void detached_dev_do_request(struct bcache_device *d,
> > +             struct bio *orig_bio, unsigned long start_time)
> >  {
> >       struct detached_dev_io_private *ddip;
> >       struct cached_dev *dc = container_of(d, struct cached_dev, disk);
> >
> > +     if (bio_op(orig_bio) == REQ_OP_DISCARD &&
> > +         !bdev_max_discard_sectors(dc->bdev)) {
> > +             bio_endio(orig_bio);
> > +             return;
> > +     }
> > +
> >       /*
> >        * no need to call closure_get(&dc->disk.cl),
> >        * because upper layer had already opened bcache device,
> >        * which would call closure_get(&dc->disk.cl)
> >        */
> >       ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
> > -     if (!ddip) {
> > -             bio->bi_status = BLK_STS_RESOURCE;
> > -             bio_endio(bio);
> > -             return;
> > -     }
> > -
> > -     ddip->d = d;
> > +     if (!ddip)
> > +             goto enomem;
> > +     if (bio_init_clone(dc->bdev, &ddip->bio, orig_bio, GFP_NOIO))
> > +             goto free_ddip;
> >       /* Count on the bcache device */
> > -     ddip->orig_bdev = orig_bdev;
> >       ddip->start_time = start_time;
> > -     ddip->bi_end_io = bio->bi_end_io;
> > -     ddip->bi_private = bio->bi_private;
> > -     bio->bi_end_io = detached_dev_end_io;
> > -     bio->bi_private = ddip;
> > -
> > -     if ((bio_op(bio) == REQ_OP_DISCARD) &&
> > -         !bdev_max_discard_sectors(dc->bdev))
> > -             detached_dev_end_io(bio);
> > -     else
> > -             submit_bio_noacct(bio);
> > +     ddip->orig_bio = orig_bio;
> > +     ddip->bio.bi_end_io = detached_dev_end_io;
> > +     ddip->bio.bi_private = dc;
> > +     submit_bio_noacct(&ddip->bio);
> > +     return;
> > +free_ddip:
> > +     kfree(ddip);
> > +enomem:
> > +     orig_bio->bi_status = BLK_STS_RESOURCE;
> > +     bio_endio(orig_bio);
> >  }
> >
> >  static void quit_max_writeback_rate(struct cache_set *c,
> > @@ -1214,10 +1213,10 @@ void cached_dev_submit_bio(struct bio *bio)
> >
> >       start_time = bio_start_io_acct(bio);
> >
> > -     bio_set_dev(bio, dc->bdev);
> >       bio->bi_iter.bi_sector += dc->sb.data_offset;
> >
> >       if (cached_dev_get(dc)) {
> > +             bio_set_dev(bio, dc->bdev);
> >               s = search_alloc(bio, d, orig_bdev, start_time);
> >               trace_bcache_request_start(s->d, bio);
> >
> > @@ -1237,9 +1236,10 @@ void cached_dev_submit_bio(struct bio *bio)
> >                       else
> >                               cached_dev_read(dc, s);
> >               }
> > -     } else
> > +     } else {
> >               /* I/O request sent to backing device */
> > -             detached_dev_do_request(d, bio, orig_bdev, start_time);
> > +             detached_dev_do_request(d, bio, start_time);
> > +     }
> >  }
> >
> >  static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ