[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2a60eeed-8b7e-13ac-78ed-04ed44e13bca@lightnvm.io>
Date: Mon, 24 Jun 2019 12:36:56 +0200
From: Matias Bjørling <mb@...htnvm.io>
To: Damien Le Moal <Damien.LeMoal@....com>,
"axboe@...com" <axboe@...com>, "hch@....de" <hch@....de>,
Chaitanya Kulkarni <Chaitanya.Kulkarni@....com>,
Dmitry Fomichev <Dmitry.Fomichev@....com>,
Ajay Joshi <Ajay.Joshi@....com>,
Aravind Ramesh <Aravind.Ramesh@....com>,
"martin.petersen@...cle.com" <martin.petersen@...cle.com>,
"James.Bottomley@...senPartnership.com"
<James.Bottomley@...senPartnership.com>,
"agk@...hat.com" <agk@...hat.com>,
"snitzer@...hat.com" <snitzer@...hat.com>
Cc: "linux-block@...r.kernel.org" <linux-block@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-scsi@...r.kernel.org" <linux-scsi@...r.kernel.org>,
"dm-devel@...hat.com" <dm-devel@...hat.com>,
Matias Bjorling <Matias.Bjorling@....com>
Subject: Re: [PATCH 1/4] block: add zone open, close and finish support
On 6/22/19 2:51 AM, Damien Le Moal wrote:
> Matias,
>
> Some comments inline below.
>
> On 2019/06/21 22:07, Matias Bjørling wrote:
>> From: Ajay Joshi <ajay.joshi@....com>
>>
>> Zoned block devices allows one to control zone transitions by using
>> explicit commands. The available transitions are:
>>
>> * Open zone: Transition a zone to open state.
>> * Close zone: Transition a zone to closed state.
>> * Finish zone: Transition a zone to full state.
>>
>> Allow kernel to issue these transitions by introducing
>> blkdev_zones_mgmt_op() and add three new request opcodes:
>>
>> * REQ_IO_ZONE_OPEN, REQ_IO_ZONE_CLOSE, and REQ_OP_ZONE_FINISH
>>
>> Allow user-space to issue the transitions through the following ioctls:
>>
>> * BLKOPENZONE, BLKCLOSEZONE, and BLKFINISHZONE.
>>
>> Signed-off-by: Ajay Joshi <ajay.joshi@....com>
>> Signed-off-by: Aravind Ramesh <aravind.ramesh@....com>
>> Signed-off-by: Matias Bjørling <matias.bjorling@....com>
>> ---
>> block/blk-core.c | 3 ++
>> block/blk-zoned.c | 51 ++++++++++++++++++++++---------
>> block/ioctl.c | 5 ++-
>> include/linux/blk_types.h | 27 +++++++++++++++--
>> include/linux/blkdev.h | 57 ++++++++++++++++++++++++++++++-----
>> include/uapi/linux/blkzoned.h | 17 +++++++++--
>> 6 files changed, 133 insertions(+), 27 deletions(-)
>>
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index 8340f69670d8..c0f0dbad548d 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio)
>> goto not_supported;
>> break;
>> case REQ_OP_ZONE_RESET:
>> + case REQ_OP_ZONE_OPEN:
>> + case REQ_OP_ZONE_CLOSE:
>> + case REQ_OP_ZONE_FINISH:
>> if (!blk_queue_is_zoned(q))
>> goto not_supported;
>> break;
>> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
>> index ae7e91bd0618..d0c933593b93 100644
>> --- a/block/blk-zoned.c
>> +++ b/block/blk-zoned.c
>> @@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>> EXPORT_SYMBOL_GPL(blkdev_report_zones);
>>
>> /**
>> - * blkdev_reset_zones - Reset zones write pointer
>> + * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s)
>> * @bdev: Target block device
>> - * @sector: Start sector of the first zone to reset
>> - * @nr_sectors: Number of sectors, at least the length of one zone
>> + * @op: Operation to be performed on the zone(s)
>> + * @sector: Start sector of the first zone to operate on
>> + * @nr_sectors: Number of sectors, at least the length of one zone and
>> + * must be zone size aligned.
>> * @gfp_mask: Memory allocation flags (for bio_alloc)
>> *
>> * Description:
>> - * Reset the write pointer of the zones contained in the range
>> + * Perform the specified operation contained in the range
> Perform the specified operation over the sector range
>> * @sector..@...tor+@...sectors. Specifying the entire disk sector range
>> * is valid, but the specified range should not contain conventional zones.
>> */
>> -int blkdev_reset_zones(struct block_device *bdev,
>> - sector_t sector, sector_t nr_sectors,
>> - gfp_t gfp_mask)
>> +int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> {
>> struct request_queue *q = bdev_get_queue(bdev);
>> sector_t zone_sectors;
>> @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev,
>> if (!blk_queue_is_zoned(q))
>> return -EOPNOTSUPP;
>>
>> + if (!op_is_zone_mgmt_op(op))
>> + return -EOPNOTSUPP;
>
> EINVAL may be better here.
>
>> +
>> if (bdev_read_only(bdev))
>> return -EPERM;
>>
>> @@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>> bio = blk_next_bio(bio, 0, gfp_mask);
>> bio->bi_iter.bi_sector = sector;
>> bio_set_dev(bio, bdev);
>> - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
>> + bio_set_op_attrs(bio, op, 0);
>>
>> sector += zone_sectors;
>>
>> @@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>>
>> return ret;
>> }
>> -EXPORT_SYMBOL_GPL(blkdev_reset_zones);
>> +EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op);
>>
>> /*
>> * BLKREPORTZONE ioctl processing.
>> @@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> }
>>
>> /*
>> - * BLKRESETZONE ioctl processing.
>> + * Zone operation (open, close, finish or reset) ioctl processing.
>> * Called from blkdev_ioctl.
>> */
>> -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> - unsigned int cmd, unsigned long arg)
>> +int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
>> + unsigned int cmd, unsigned long arg)
>> {
>> void __user *argp = (void __user *)arg;
>> struct request_queue *q;
>> struct blk_zone_range zrange;
>> + enum req_opf op;
>>
>> if (!argp)
>> return -EINVAL;
>> @@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
>> return -EFAULT;
>>
>> - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
>> - GFP_KERNEL);
>> + switch (cmd) {
>> + case BLKRESETZONE:
>> + op = REQ_OP_ZONE_RESET;
>> + break;
>> + case BLKOPENZONE:
>> + op = REQ_OP_ZONE_OPEN;
>> + break;
>> + case BLKCLOSEZONE:
>> + op = REQ_OP_ZONE_CLOSE;
>> + break;
>> + case BLKFINISHZONE:
>> + op = REQ_OP_ZONE_FINISH;
>> + break;
>> + default:
>> + return -ENOTTY;
>> + }
>> +
>> + return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors,
>> + GFP_KERNEL);
>> }
>>
>> static inline unsigned long *blk_alloc_zone_bitmap(int node,
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 15a0eb80ada9..df7fe54db158 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
>> case BLKREPORTZONE:
>> return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
>> case BLKRESETZONE:
>> - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
>> + case BLKOPENZONE:
>> + case BLKCLOSEZONE:
>> + case BLKFINISHZONE:
>> + return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg);
>> case BLKGETZONESZ:
>> return put_uint(arg, bdev_zone_sectors(bdev));
>> case BLKGETNRZONES:
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index 95202f80676c..067ef9242275 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -284,13 +284,20 @@ enum req_opf {
>> REQ_OP_DISCARD = 3,
>> /* securely erase sectors */
>> REQ_OP_SECURE_ERASE = 5,
>> - /* reset a zone write pointer */
>> - REQ_OP_ZONE_RESET = 6,
>> /* write the same sector many times */
>> REQ_OP_WRITE_SAME = 7,
>> /* write the zero filled sector many times */
>> REQ_OP_WRITE_ZEROES = 9,
>>
>> + /* reset a zone write pointer */
>> + REQ_OP_ZONE_RESET = 16,
>> + /* Open zone(s) */
>> + REQ_OP_ZONE_OPEN = 17,
>> + /* Close zone(s) */
>> + REQ_OP_ZONE_CLOSE = 18,
>> + /* Finish zone(s) */
>> + REQ_OP_ZONE_FINISH = 19,
>> +
>> /* SCSI passthrough using struct scsi_request */
>> REQ_OP_SCSI_IN = 32,
>> REQ_OP_SCSI_OUT = 33,
>> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
>> bio->bi_opf = op | op_flags;
>> }
>>
>> +/*
>> + * Check if the op is zoned operation.
> Check if op is a zone management operation.
>> + */
>> +static inline bool op_is_zone_mgmt_op(enum req_opf op)
>
> Similarly to "op_is_write" pattern, "op_is_zone_mgmt" may be a better name.
>
>> +{
>> + switch (op) {
>> + case REQ_OP_ZONE_RESET:
>> + case REQ_OP_ZONE_OPEN:
>> + case REQ_OP_ZONE_CLOSE:
>> + case REQ_OP_ZONE_FINISH:
>> + return true;
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> static inline bool op_is_write(unsigned int op)
>> {
>> return (op & 1);
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 592669bcc536..943084f9dc9c 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
>> extern int blkdev_report_zones(struct block_device *bdev,
>> sector_t sector, struct blk_zone *zones,
>> unsigned int *nr_zones, gfp_t gfp_mask);
>> -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
>> - sector_t nr_sectors, gfp_t gfp_mask);
>> extern int blk_revalidate_disk_zones(struct gendisk *disk);
>>
>> extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> unsigned int cmd, unsigned long arg);
>> -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
>> - unsigned int cmd, unsigned long arg);
>> +extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
>> + unsigned int cmd, unsigned long arg);
>> +extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask);
>
> To keep the grouping of declarations, may be declare this one where
> blkdev_reset_zones() was ?
>
>>
>> #else /* CONFIG_BLK_DEV_ZONED */
>>
>> @@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
>> return -ENOTTY;
>> }
>>
>> -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
>> - fmode_t mode, unsigned int cmd,
>> - unsigned long arg)
>> +static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev,
>> + fmode_t mode, unsigned int cmd,
>> + unsigned long arg)
>> +{
>> + return -ENOTTY;
>> +}
>> +
>> +static inline int blkdev_zones_mgmt_op(struct block_device *bdev,
>> + enum req_opf op,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> {
>> return -ENOTTY;
>
> That should be -ENOTSUPP. This is not an ioctl. The ioctl call is above this one.
>
>> }
>>
>> #endif /* CONFIG_BLK_DEV_ZONED */
>>
>> +static inline int blkdev_reset_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET,
>> + sector, nr_sectors, gfp_mask);
>> +}
>> +
>> +static inline int blkdev_open_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN,
>> + sector, nr_sectors, gfp_mask);
>> +}
>> +
>> +static inline int blkdev_close_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE,
>> + sector, nr_sectors,
>> + gfp_mask);
>> +}
>> +
>> +static inline int blkdev_finish_zones(struct block_device *bdev,
>> + sector_t sector, sector_t nr_sectors,
>> + gfp_t gfp_mask)
>> +{
>> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH,
>> + sector, nr_sectors,
>> + gfp_mask);
>> +}
>> +
>> struct request_queue {
>> /*
>> * Together with queue_head for cacheline sharing
>> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
>> index 498eec813494..701e0692b8d3 100644
>> --- a/include/uapi/linux/blkzoned.h
>> +++ b/include/uapi/linux/blkzoned.h
>> @@ -120,9 +120,11 @@ struct blk_zone_report {
>> };
>>
>> /**
>> - * struct blk_zone_range - BLKRESETZONE ioctl request
>> - * @sector: starting sector of the first zone to issue reset write pointer
>> - * @nr_sectors: Total number of sectors of 1 or more zones to reset
>> + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
>> + * BLKCLOSEZONE/BLKFINISHZONE ioctl
>> + * request
>> + * @sector: starting sector of the first zone to operate on
>> + * @nr_sectors: Total number of sectors of all zones to operate on
>> */
>> struct blk_zone_range {
>> __u64 sector;
>> @@ -139,10 +141,19 @@ struct blk_zone_range {
>> * sector range. The sector range must be zone aligned.
>> * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
>> * @BLKGETNRZONES: Get the total number of zones of the device.
>> + * @BLKOPENZONE: Open the zones in the specified sector range. The
>> + * sector range must be zone aligned.
>> + * @BLKCLOSEZONE: Close the zones in the specified sector range. The
>> + * sector range must be zone aligned.
>> + * @BLKFINISHZONE: Finish the zones in the specified sector range. The
>> + * sector range must be zone aligned.
>> */
>> #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
>> #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
>> #define BLKGETZONESZ _IOR(0x12, 132, __u32)
>> #define BLKGETNRZONES _IOR(0x12, 133, __u32)
>> +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
>> +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
>> +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
>>
>> #endif /* _UAPI_BLKZONED_H */
>>
>
>
Thanks Damien.
I was wondering if we should, instead of introducing three new ioctls,
make a v2 of the zone mgmt API?
Something like the following data structure being passed from user-space:
struct blk_zone_mgmt {
__u8 opcode;
__u8 resv[3];
__u32 flags;
__u64 sector;
__u64 nr_sectors;
__u64 resv1; /* to make room if we where do pass a data
data pointer through this API */
};
-Matias
Powered by blists - more mailing lists