lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BYAPR04MB5816F7C8CA5B915DEEAF22D2E7E60@BYAPR04MB5816.namprd04.prod.outlook.com>
Date:   Sat, 22 Jun 2019 00:51:36 +0000
From:   Damien Le Moal <Damien.LeMoal@....com>
To:     Matias Bjørling <mb@...htnvm.io>,
        "axboe@...com" <axboe@...com>, "hch@....de" <hch@....de>,
        Chaitanya Kulkarni <Chaitanya.Kulkarni@....com>,
        Dmitry Fomichev <Dmitry.Fomichev@....com>,
        Ajay Joshi <Ajay.Joshi@....com>,
        Aravind Ramesh <Aravind.Ramesh@....com>,
        "martin.petersen@...cle.com" <martin.petersen@...cle.com>,
        "James.Bottomley@...senPartnership.com" 
        <James.Bottomley@...senPartnership.com>,
        "agk@...hat.com" <agk@...hat.com>,
        "snitzer@...hat.com" <snitzer@...hat.com>
CC:     "linux-block@...r.kernel.org" <linux-block@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "linux-scsi@...r.kernel.org" <linux-scsi@...r.kernel.org>,
        "dm-devel@...hat.com" <dm-devel@...hat.com>,
        Matias Bjorling <Matias.Bjorling@....com>
Subject: Re: [PATCH 1/4] block: add zone open, close and finish support

Matias,

Some comments inline below.

On 2019/06/21 22:07, Matias Bjørling wrote:
> From: Ajay Joshi <ajay.joshi@....com>
> 
> Zoned block devices allows one to control zone transitions by using
> explicit commands. The available transitions are:
> 
>   * Open zone: Transition a zone to open state.
>   * Close zone: Transition a zone to closed state.
>   * Finish zone: Transition a zone to full state.
> 
> Allow kernel to issue these transitions by introducing
> blkdev_zones_mgmt_op() and add three new request opcodes:
> 
>   * REQ_IO_ZONE_OPEN, REQ_IO_ZONE_CLOSE, and REQ_OP_ZONE_FINISH
> 
> Allow user-space to issue the transitions through the following ioctls:
> 
>   * BLKOPENZONE, BLKCLOSEZONE, and BLKFINISHZONE.
> 
> Signed-off-by: Ajay Joshi <ajay.joshi@....com>
> Signed-off-by: Aravind Ramesh <aravind.ramesh@....com>
> Signed-off-by: Matias Bjørling <matias.bjorling@....com>
> ---
>  block/blk-core.c              |  3 ++
>  block/blk-zoned.c             | 51 ++++++++++++++++++++++---------
>  block/ioctl.c                 |  5 ++-
>  include/linux/blk_types.h     | 27 +++++++++++++++--
>  include/linux/blkdev.h        | 57 ++++++++++++++++++++++++++++++-----
>  include/uapi/linux/blkzoned.h | 17 +++++++++--
>  6 files changed, 133 insertions(+), 27 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 8340f69670d8..c0f0dbad548d 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio)
>  			goto not_supported;
>  		break;
>  	case REQ_OP_ZONE_RESET:
> +	case REQ_OP_ZONE_OPEN:
> +	case REQ_OP_ZONE_CLOSE:
> +	case REQ_OP_ZONE_FINISH:
>  		if (!blk_queue_is_zoned(q))
>  			goto not_supported;
>  		break;
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index ae7e91bd0618..d0c933593b93 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
>  EXPORT_SYMBOL_GPL(blkdev_report_zones);
>  
>  /**
> - * blkdev_reset_zones - Reset zones write pointer
> + * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s)
>   * @bdev:	Target block device
> - * @sector:	Start sector of the first zone to reset
> - * @nr_sectors:	Number of sectors, at least the length of one zone
> + * @op:		Operation to be performed on the zone(s)
> + * @sector:	Start sector of the first zone to operate on
> + * @nr_sectors:	Number of sectors, at least the length of one zone and
> + *              must be zone size aligned.
>   * @gfp_mask:	Memory allocation flags (for bio_alloc)
>   *
>   * Description:
> - *    Reset the write pointer of the zones contained in the range
> + *    Perform the specified operation contained in the range
	Perform the specified operation over the sector range
>   *    @sector..@...tor+@...sectors. Specifying the entire disk sector range
>   *    is valid, but the specified range should not contain conventional zones.
>   */
> -int blkdev_reset_zones(struct block_device *bdev,
> -		       sector_t sector, sector_t nr_sectors,
> -		       gfp_t gfp_mask)
> +int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
> +			 sector_t sector, sector_t nr_sectors,
> +			 gfp_t gfp_mask)
>  {
>  	struct request_queue *q = bdev_get_queue(bdev);
>  	sector_t zone_sectors;
> @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev,
>  	if (!blk_queue_is_zoned(q))
>  		return -EOPNOTSUPP;
>  
> +	if (!op_is_zone_mgmt_op(op))
> +		return -EOPNOTSUPP;

EINVAL may be better here.

> +
>  	if (bdev_read_only(bdev))
>  		return -EPERM;
>  
> @@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>  		bio = blk_next_bio(bio, 0, gfp_mask);
>  		bio->bi_iter.bi_sector = sector;
>  		bio_set_dev(bio, bdev);
> -		bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
> +		bio_set_op_attrs(bio, op, 0);
>  
>  		sector += zone_sectors;
>  
> @@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>  
>  	return ret;
>  }
> -EXPORT_SYMBOL_GPL(blkdev_reset_zones);
> +EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op);
>  
>  /*
>   * BLKREPORTZONE ioctl processing.
> @@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>  }
>  
>  /*
> - * BLKRESETZONE ioctl processing.
> + * Zone operation (open, close, finish or reset) ioctl processing.
>   * Called from blkdev_ioctl.
>   */
> -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> -			     unsigned int cmd, unsigned long arg)
> +int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
> +				unsigned int cmd, unsigned long arg)
>  {
>  	void __user *argp = (void __user *)arg;
>  	struct request_queue *q;
>  	struct blk_zone_range zrange;
> +	enum req_opf op;
>  
>  	if (!argp)
>  		return -EINVAL;
> @@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
>  	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
>  		return -EFAULT;
>  
> -	return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
> -				  GFP_KERNEL);
> +	switch (cmd) {
> +	case BLKRESETZONE:
> +		op = REQ_OP_ZONE_RESET;
> +		break;
> +	case BLKOPENZONE:
> +		op = REQ_OP_ZONE_OPEN;
> +		break;
> +	case BLKCLOSEZONE:
> +		op = REQ_OP_ZONE_CLOSE;
> +		break;
> +	case BLKFINISHZONE:
> +		op = REQ_OP_ZONE_FINISH;
> +		break;
> +	default:
> +		return -ENOTTY;
> +	}
> +
> +	return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors,
> +				    GFP_KERNEL);
>  }
>  
>  static inline unsigned long *blk_alloc_zone_bitmap(int node,
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 15a0eb80ada9..df7fe54db158 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
>  	case BLKREPORTZONE:
>  		return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
>  	case BLKRESETZONE:
> -		return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
> +	case BLKOPENZONE:
> +	case BLKCLOSEZONE:
> +	case BLKFINISHZONE:
> +		return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg);
>  	case BLKGETZONESZ:
>  		return put_uint(arg, bdev_zone_sectors(bdev));
>  	case BLKGETNRZONES:
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 95202f80676c..067ef9242275 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -284,13 +284,20 @@ enum req_opf {
>  	REQ_OP_DISCARD		= 3,
>  	/* securely erase sectors */
>  	REQ_OP_SECURE_ERASE	= 5,
> -	/* reset a zone write pointer */
> -	REQ_OP_ZONE_RESET	= 6,
>  	/* write the same sector many times */
>  	REQ_OP_WRITE_SAME	= 7,
>  	/* write the zero filled sector many times */
>  	REQ_OP_WRITE_ZEROES	= 9,
>  
> +	/* reset a zone write pointer */
> +	REQ_OP_ZONE_RESET	= 16,
> +	/* Open zone(s) */
> +	REQ_OP_ZONE_OPEN	= 17,
> +	/* Close zone(s) */
> +	REQ_OP_ZONE_CLOSE	= 18,
> +	/* Finish zone(s) */
> +	REQ_OP_ZONE_FINISH	= 19,
> +
>  	/* SCSI passthrough using struct scsi_request */
>  	REQ_OP_SCSI_IN		= 32,
>  	REQ_OP_SCSI_OUT		= 33,
> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
>  	bio->bi_opf = op | op_flags;
>  }
>  
> +/*
> + * Check if the op is zoned operation.
      Check if op is a zone management operation.
> + */
> +static inline bool op_is_zone_mgmt_op(enum req_opf op)

Similarly to "op_is_write" pattern, "op_is_zone_mgmt" may be a better name.

> +{
> +	switch (op) {
> +	case REQ_OP_ZONE_RESET:
> +	case REQ_OP_ZONE_OPEN:
> +	case REQ_OP_ZONE_CLOSE:
> +	case REQ_OP_ZONE_FINISH:
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +
>  static inline bool op_is_write(unsigned int op)
>  {
>  	return (op & 1);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 592669bcc536..943084f9dc9c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
>  extern int blkdev_report_zones(struct block_device *bdev,
>  			       sector_t sector, struct blk_zone *zones,
>  			       unsigned int *nr_zones, gfp_t gfp_mask);
> -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
> -			      sector_t nr_sectors, gfp_t gfp_mask);
>  extern int blk_revalidate_disk_zones(struct gendisk *disk);
>  
>  extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
>  				     unsigned int cmd, unsigned long arg);
> -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> -				    unsigned int cmd, unsigned long arg);
> +extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
> +					unsigned int cmd, unsigned long arg);
> +extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
> +				sector_t sector, sector_t nr_sectors,
> +				gfp_t gfp_mask);

To keep the grouping of declarations, may be declare this one where
blkdev_reset_zones() was ?

>  
>  #else /* CONFIG_BLK_DEV_ZONED */
>  
> @@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
>  	return -ENOTTY;
>  }
>  
> -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
> -					   fmode_t mode, unsigned int cmd,
> -					   unsigned long arg)
> +static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev,
> +					     fmode_t mode, unsigned int cmd,
> +					     unsigned long arg)
> +{
> +	return -ENOTTY;
> +}
> +
> +static inline int blkdev_zones_mgmt_op(struct block_device *bdev,
> +				       enum req_opf op,
> +				       sector_t sector, sector_t nr_sectors,
> +				       gfp_t gfp_mask)
>  {
>  	return -ENOTTY;

That should be -ENOTSUPP. This is not an ioctl. The ioctl call is above this one.

>  }
>  
>  #endif /* CONFIG_BLK_DEV_ZONED */
>  
> +static inline int blkdev_reset_zones(struct block_device *bdev,
> +				     sector_t sector, sector_t nr_sectors,
> +				     gfp_t gfp_mask)
> +{
> +	return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET,
> +				    sector, nr_sectors, gfp_mask);
> +}
> +
> +static inline int blkdev_open_zones(struct block_device *bdev,
> +				    sector_t sector, sector_t nr_sectors,
> +				    gfp_t gfp_mask)
> +{
> +	return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN,
> +				    sector, nr_sectors, gfp_mask);
> +}
> +
> +static inline int blkdev_close_zones(struct block_device *bdev,
> +				     sector_t sector, sector_t nr_sectors,
> +				     gfp_t gfp_mask)
> +{
> +	return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE,
> +				    sector, nr_sectors,
> +				    gfp_mask);
> +}
> +
> +static inline int blkdev_finish_zones(struct block_device *bdev,
> +				      sector_t sector, sector_t nr_sectors,
> +				      gfp_t gfp_mask)
> +{
> +	return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH,
> +				    sector, nr_sectors,
> +				    gfp_mask);
> +}
> +
>  struct request_queue {
>  	/*
>  	 * Together with queue_head for cacheline sharing
> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
> index 498eec813494..701e0692b8d3 100644
> --- a/include/uapi/linux/blkzoned.h
> +++ b/include/uapi/linux/blkzoned.h
> @@ -120,9 +120,11 @@ struct blk_zone_report {
>  };
>  
>  /**
> - * struct blk_zone_range - BLKRESETZONE ioctl request
> - * @sector: starting sector of the first zone to issue reset write pointer
> - * @nr_sectors: Total number of sectors of 1 or more zones to reset
> + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
> + *			   BLKCLOSEZONE/BLKFINISHZONE ioctl
> + *			   request
> + * @sector: starting sector of the first zone to operate on
> + * @nr_sectors: Total number of sectors of all zones to operate on
>   */
>  struct blk_zone_range {
>  	__u64		sector;
> @@ -139,10 +141,19 @@ struct blk_zone_range {
>   *                sector range. The sector range must be zone aligned.
>   * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
>   * @BLKGETNRZONES: Get the total number of zones of the device.
> + * @BLKOPENZONE: Open the zones in the specified sector range. The
> + *               sector range must be zone aligned.
> + * @BLKCLOSEZONE: Close the zones in the specified sector range. The
> + *                sector range must be zone aligned.
> + * @BLKFINISHZONE: Finish the zones in the specified sector range. The
> + *                 sector range must be zone aligned.
>   */
>  #define BLKREPORTZONE	_IOWR(0x12, 130, struct blk_zone_report)
>  #define BLKRESETZONE	_IOW(0x12, 131, struct blk_zone_range)
>  #define BLKGETZONESZ	_IOR(0x12, 132, __u32)
>  #define BLKGETNRZONES	_IOR(0x12, 133, __u32)
> +#define BLKOPENZONE	_IOW(0x12, 134, struct blk_zone_range)
> +#define BLKCLOSEZONE	_IOW(0x12, 135, struct blk_zone_range)
> +#define BLKFINISHZONE	_IOW(0x12, 136, struct blk_zone_range)
>  
>  #endif /* _UAPI_BLKZONED_H */
> 


-- 
Damien Le Moal
Western Digital Research

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ