lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 18 Apr 2023 15:13:05 -0700
From:   Sarthak Kukreti <sarthakkukreti@...omium.org>
To:     Brian Foster <bfoster@...hat.com>
Cc:     sarthakkukreti@...gle.com, dm-devel@...hat.com,
        linux-block@...r.kernel.org, linux-ext4@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        Jens Axboe <axboe@...nel.dk>,
        "Michael S. Tsirkin" <mst@...hat.com>,
        Jason Wang <jasowang@...hat.com>,
        Stefan Hajnoczi <stefanha@...hat.com>,
        Alasdair Kergon <agk@...hat.com>,
        Mike Snitzer <snitzer@...nel.org>,
        Christoph Hellwig <hch@...radead.org>,
        "Theodore Ts'o" <tytso@....edu>,
        Andreas Dilger <adilger.kernel@...ger.ca>,
        Bart Van Assche <bvanassche@...gle.com>,
        Daniil Lunev <dlunev@...gle.com>,
        "Darrick J. Wong" <djwong@...nel.org>
Subject: Re: [PATCH v3 1/3] block: Introduce provisioning primitives

On Mon, Apr 17, 2023 at 10:33 AM Brian Foster <bfoster@...hat.com> wrote:
>
> On Thu, Apr 13, 2023 at 05:02:17PM -0700, Sarthak Kukreti wrote:
> > Introduce block request REQ_OP_PROVISION. The intent of this request
> > is to request underlying storage to preallocate disk space for the given
> > block range. Block devices that support this capability will export
> > a provision limit within their request queues.
> >
> > This patch also adds the capability to call fallocate() in mode 0
> > on block devices, which will send REQ_OP_PROVISION to the block
> > device for the specified range,
> >
> > Signed-off-by: Sarthak Kukreti <sarthakkukreti@...omium.org>
> > ---
> >  block/blk-core.c          |  5 ++++
> >  block/blk-lib.c           | 53 +++++++++++++++++++++++++++++++++++++++
> >  block/blk-merge.c         | 18 +++++++++++++
> >  block/blk-settings.c      | 19 ++++++++++++++
> >  block/blk-sysfs.c         |  8 ++++++
> >  block/bounce.c            |  1 +
> >  block/fops.c              | 14 ++++++++---
> >  include/linux/bio.h       |  6 +++--
> >  include/linux/blk_types.h |  5 +++-
> >  include/linux/blkdev.h    | 16 ++++++++++++
> >  10 files changed, 138 insertions(+), 7 deletions(-)
> >
> ...
> > diff --git a/block/fops.c b/block/fops.c
> > index d2e6be4e3d1c..f82da2fb8af0 100644
> > --- a/block/fops.c
> > +++ b/block/fops.c
> > @@ -625,7 +625,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
> >       int error;
> >
> >       /* Fail if we don't recognize the flags. */
> > -     if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
> > +     if (mode != 0 && mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
> >               return -EOPNOTSUPP;
> >
> >       /* Don't go off the end of the device. */
> > @@ -649,11 +649,17 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
> >       filemap_invalidate_lock(inode->i_mapping);
> >
> >       /* Invalidate the page cache, including dirty pages. */
> > -     error = truncate_bdev_range(bdev, file->f_mode, start, end);
> > -     if (error)
> > -             goto fail;
> > +     if (mode != 0) {
> > +             error = truncate_bdev_range(bdev, file->f_mode, start, end);
> > +             if (error)
> > +                     goto fail;
> > +     }
> >
> >       switch (mode) {
> > +     case 0:
> > +             error = blkdev_issue_provision(bdev, start >> SECTOR_SHIFT,
> > +                                            len >> SECTOR_SHIFT, GFP_KERNEL);
> > +             break;
>
> I would think we'd want to support any combination of
> FALLOC_FL_KEEP_SIZE and FALLOC_FL_UNSHARE_RANGE..? All of the other
> commands support the former modifier, for one. It also looks like if
> somebody attempts to invoke with mode == FALLOC_FL_KEEP_SIZE, even with
> the current upstream code that would perform the bdev truncate before
> returning -EOPNOTSUPP. That seems like a bit of an unfortunate side
> effect to me.
>
Added a separate flag set to decide whether we should truncate or not.

> WRT to unshare, if the PROVISION request is always going to imply an
> unshare (which seems reasonable to me), there's probably no reason to
> -EOPNOTSUPP if a caller explicitly passes UNSHARE_RANGE.
>
Added handling in v4.

Thanks!

Sarthak

> Brian
>
> >       case FALLOC_FL_ZERO_RANGE:
> >       case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
> >               error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
> > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > index d766be7152e1..9820b3b039f2 100644
> > --- a/include/linux/bio.h
> > +++ b/include/linux/bio.h
> > @@ -57,7 +57,8 @@ static inline bool bio_has_data(struct bio *bio)
> >           bio->bi_iter.bi_size &&
> >           bio_op(bio) != REQ_OP_DISCARD &&
> >           bio_op(bio) != REQ_OP_SECURE_ERASE &&
> > -         bio_op(bio) != REQ_OP_WRITE_ZEROES)
> > +         bio_op(bio) != REQ_OP_WRITE_ZEROES &&
> > +         bio_op(bio) != REQ_OP_PROVISION)
> >               return true;
> >
> >       return false;
> > @@ -67,7 +68,8 @@ static inline bool bio_no_advance_iter(const struct bio *bio)
> >  {
> >       return bio_op(bio) == REQ_OP_DISCARD ||
> >              bio_op(bio) == REQ_OP_SECURE_ERASE ||
> > -            bio_op(bio) == REQ_OP_WRITE_ZEROES;
> > +            bio_op(bio) == REQ_OP_WRITE_ZEROES ||
> > +            bio_op(bio) == REQ_OP_PROVISION;
> >  }
> >
> >  static inline void *bio_data(struct bio *bio)
> > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> > index 99be590f952f..27bdf88f541c 100644
> > --- a/include/linux/blk_types.h
> > +++ b/include/linux/blk_types.h
> > @@ -385,7 +385,10 @@ enum req_op {
> >       REQ_OP_DRV_IN           = (__force blk_opf_t)34,
> >       REQ_OP_DRV_OUT          = (__force blk_opf_t)35,
> >
> > -     REQ_OP_LAST             = (__force blk_opf_t)36,
> > +     /* request device to provision block */
> > +     REQ_OP_PROVISION        = (__force blk_opf_t)37,
> > +
> > +     REQ_OP_LAST             = (__force blk_opf_t)38,
> >  };
> >
> >  enum req_flag_bits {
> > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > index 941304f17492..239e2f418b6e 100644
> > --- a/include/linux/blkdev.h
> > +++ b/include/linux/blkdev.h
> > @@ -303,6 +303,7 @@ struct queue_limits {
> >       unsigned int            discard_granularity;
> >       unsigned int            discard_alignment;
> >       unsigned int            zone_write_granularity;
> > +     unsigned int            max_provision_sectors;
> >
> >       unsigned short          max_segments;
> >       unsigned short          max_integrity_segments;
> > @@ -921,6 +922,8 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q,
> >               unsigned int max_discard_sectors);
> >  extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
> >               unsigned int max_write_same_sectors);
> > +extern void blk_queue_max_provision_sectors(struct request_queue *q,
> > +             unsigned int max_provision_sectors);
> >  extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
> >  extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
> >               unsigned int max_zone_append_sectors);
> > @@ -1060,6 +1063,9 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
> >  int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
> >               sector_t nr_sects, gfp_t gfp);
> >
> > +extern int blkdev_issue_provision(struct block_device *bdev, sector_t sector,
> > +             sector_t nr_sects, gfp_t gfp_mask);
> > +
> >  #define BLKDEV_ZERO_NOUNMAP  (1 << 0)  /* do not free blocks */
> >  #define BLKDEV_ZERO_NOFALLBACK       (1 << 1)  /* don't write explicit zeroes */
> >
> > @@ -1139,6 +1145,11 @@ static inline unsigned short queue_max_discard_segments(const struct request_que
> >       return q->limits.max_discard_segments;
> >  }
> >
> > +static inline unsigned short queue_max_provision_sectors(const struct request_queue *q)
> > +{
> > +     return q->limits.max_provision_sectors;
> > +}
> > +
> >  static inline unsigned int queue_max_segment_size(const struct request_queue *q)
> >  {
> >       return q->limits.max_segment_size;
> > @@ -1281,6 +1292,11 @@ static inline bool bdev_nowait(struct block_device *bdev)
> >       return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags);
> >  }
> >
> > +static inline unsigned int bdev_max_provision_sectors(struct block_device *bdev)
> > +{
> > +     return bdev_get_queue(bdev)->limits.max_provision_sectors;
> > +}
> > +
> >  static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
> >  {
> >       return blk_queue_zoned_model(bdev_get_queue(bdev));
> > --
> > 2.40.0.634.g4ca3ef3211-goog
> >
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ