lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <EED79EDF-8391-4414-8DB6-79BF20B06F51@cnexlabs.com>
Date:   Fri, 31 Aug 2018 11:50:05 +0000
From:   Javier Gonzalez <javier@...xlabs.com>
To:     Matias Bjørling <mb@...htnvm.io>
CC:     "Konopko, Igor J" <igor.j.konopko@...el.com>,
        "marcin.dziegielewski@...el.com" <marcin.dziegielewski@...el.com>,
        Hans Holmberg <hans.holmberg@...xlabs.com>,
        Heiner Litz <hlitz@...c.edu>,
        Young Tack Tack Jin <youngtack.jin@...cuitblvd.com>,
        "linux-block@...r.kernel.org" <linux-block@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] lightnvm: move bad block and chunk state logic to core

> On 17 Aug 2018, at 13.14, Matias Bjørling <mb@...htnvm.io> wrote:
> 
> pblk implements two data paths for recovery line state. One for 1.2
> and another for 2.0, instead of having pblk implement these, combine
> them in the core to reduce complexity and make available to other
> targets.
> 
> The new interface will adhere to the 2.0 chunk definition,
> including managing open chunks with an active write pointer. To provide
> this interface, a 1.2 device recovers the state of the chunks by
> manually detecting if a chunk is either free/open/close/offline, and if
> open, scanning the flash pages sequentially to find the next writeable
> page. This process takes on average ~10 seconds on a device with 64 dies,
> 1024 blocks and 60us read access time. The process can be parallelized
> but is left out for maintenance simplicity, as the 1.2 specification is
> deprecated. For 2.0 devices, the logic is maintained internally in the
> drive and retrieved through the 2.0 interface.
> 
> Signed-off-by: Matias Bjørling <mb@...htnvm.io>
> ---
> drivers/lightnvm/core.c      | 310 +++++++++++++++++++++++++++++++++++--------
> drivers/lightnvm/pblk-core.c |   6 +-
> drivers/lightnvm/pblk-init.c | 116 +---------------
> drivers/lightnvm/pblk.h      |   2 +-
> drivers/nvme/host/lightnvm.c |   4 +-
> include/linux/lightnvm.h     |  15 +--
> 6 files changed, 266 insertions(+), 187 deletions(-)
> 
> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
> index 964352720a03..003fc073f496 100644
> --- a/drivers/lightnvm/core.c
> +++ b/drivers/lightnvm/core.c
> @@ -717,46 +717,6 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
> 	nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
> }
> 
> -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta,
> -		struct ppa_addr ppa, int nchks)
> -{
> -	struct nvm_dev *dev = tgt_dev->parent;
> -
> -	nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
> -
> -	return dev->ops->get_chk_meta(tgt_dev->parent, meta,
> -						(sector_t)ppa.ppa, nchks);
> -}
> -EXPORT_SYMBOL(nvm_get_chunk_meta);
> -
> -int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
> -		       int nr_ppas, int type)
> -{
> -	struct nvm_dev *dev = tgt_dev->parent;
> -	struct nvm_rq rqd;
> -	int ret;
> -
> -	if (nr_ppas > NVM_MAX_VLBA) {
> -		pr_err("nvm: unable to update all blocks atomically\n");
> -		return -EINVAL;
> -	}
> -
> -	memset(&rqd, 0, sizeof(struct nvm_rq));
> -
> -	nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
> -	nvm_rq_tgt_to_dev(tgt_dev, &rqd);
> -
> -	ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
> -	nvm_free_rqd_ppalist(tgt_dev, &rqd);
> -	if (ret) {
> -		pr_err("nvm: failed bb mark\n");
> -		return -EINVAL;
> -	}
> -
> -	return 0;
> -}
> -EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
> -
> static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
> {
> 	int flags = 0;
> @@ -830,27 +790,159 @@ void nvm_end_io(struct nvm_rq *rqd)
> }
> EXPORT_SYMBOL(nvm_end_io);
> 
> +static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd)
> +{
> +	if (!dev->ops->submit_io_sync)
> +		return -ENODEV;
> +
> +	rqd->flags = nvm_set_flags(&dev->geo, rqd);
> +
> +	return dev->ops->submit_io_sync(dev, rqd);
> +}
> +
> +static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
> +{
> +	struct nvm_rq rqd = { NULL };
> +	struct bio bio;
> +	struct bio_vec bio_vec;
> +	struct page *page;
> +	int ret;
> +
> +	page = alloc_page(GFP_KERNEL);
> +	if (!page)
> +		return -ENOMEM;
> +
> +	bio_init(&bio, &bio_vec, 1);
> +	bio_add_page(&bio, page, PAGE_SIZE, 0);
> +	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
> +
> +	rqd.bio = &bio;
> +	rqd.opcode = NVM_OP_PREAD;
> +	rqd.is_seq = 1;
> +	rqd.nr_ppas = 1;
> +	rqd.ppa_addr = generic_to_dev_addr(dev, ppa);
> +
> +	ret = nvm_submit_io_sync_raw(dev, &rqd);
> +	if (ret)
> +		return ret;
> +
> +	__free_page(page);
> +
> +	return rqd.error;
> +}
> +
> /*
> - * folds a bad block list from its plane representation to its virtual
> - * block representation. The fold is done in place and reduced size is
> - * returned.
> + * Scans a 1.2 chunk first and last page to determine if its state.
> + * If the chunk is found to be open, also scan it to update the write
> + * pointer.
> + */
> +static int nvm_bb_scan_chunk(struct nvm_dev *dev, struct ppa_addr ppa,
> +			     struct nvm_chk_meta *meta)
> +{
> +	struct nvm_geo *geo = &dev->geo;
> +	int ret, pg, pl;
> +
> +	/* sense first page */
> +	ret = nvm_bb_chunk_sense(dev, ppa);
> +	if (ret < 0) /* io error */
> +		return ret;
> +	else if (ret == 0) /* valid data */
> +		meta->state = NVM_CHK_ST_OPEN;
> +	else if (ret > 0) {
> +		/*
> +		 * If empty page, the chunk is free, else it is an
> +		 * actual io error. In that case, mark it offline.
> +		 */
> +		switch (ret) {
> +		case NVM_RSP_ERR_EMPTYPAGE:
> +			meta->state = NVM_CHK_ST_FREE;
> +			return 0;
> +		case NVM_RSP_ERR_FAILCRC:
> +		case NVM_RSP_ERR_FAILECC:
> +		case NVM_RSP_WARN_HIGHECC:
> +			meta->state = NVM_CHK_ST_OPEN;
> +			goto scan;
> +		default:
> +			return -ret; /* other io error */
> +		}
> +	}
> +
> +	/* sense last page */
> +	ppa.g.pg = geo->num_pg - 1;
> +	ppa.g.pl = geo->num_pln - 1;
> +
> +	ret = nvm_bb_chunk_sense(dev, ppa);
> +	if (ret < 0) /* io error */
> +		return ret;
> +	else if (ret == 0) { /* Chunk fully written */
> +		meta->state = NVM_CHK_ST_CLOSED;
> +		meta->wp = geo->clba;
> +		return 0;
> +	} else if (ret > 0) {
> +		switch (ret) {
> +		case NVM_RSP_ERR_EMPTYPAGE:
> +		case NVM_RSP_ERR_FAILCRC:
> +		case NVM_RSP_ERR_FAILECC:
> +		case NVM_RSP_WARN_HIGHECC:
> +			meta->state = NVM_CHK_ST_OPEN;
> +			break;
> +		default:
> +			return -ret; /* other io error */
> +		}
> +	}
> +
> +scan:
> +	/*
> +	 * chunk is open, we scan sequentially to update the write pointer.
> +	 * We make the assumption that targets write data across all planes
> +	 * before moving to the next page.
> +	 */
> +	for (pg = 0; pg < geo->num_pg; pg++) {
> +		for (pl = 0; pl < geo->num_pln; pl++) {
> +			ppa.g.pg = pg;
> +			ppa.g.pl = pl;
> +
> +			ret = nvm_bb_chunk_sense(dev, ppa);
> +			if (ret < 0) /* io error */
> +				return ret;
> +			else if (ret == 0) {
> +				meta->wp += geo->ws_min;
> +			} else if (ret > 0) {
> +				switch (ret) {
> +				case NVM_RSP_ERR_EMPTYPAGE:
> +					return 0;
> +				case NVM_RSP_ERR_FAILCRC:
> +				case NVM_RSP_ERR_FAILECC:
> +				case NVM_RSP_WARN_HIGHECC:
> +					meta->wp += geo->ws_min;
> +					break;
> +				default:
> +					return -ret; /* other io error */
> +				}
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * folds a bad block list from its plane representation to its
> + * chunk representation.
>  *
> - * If any of the planes status are bad or grown bad block, the virtual block
> - * is marked bad. If not bad, the first plane state acts as the block state.
> + * If any of the planes status are bad or grown bad, the chunk is marked
> + * offline. If not bad, the first plane state acts as the chunk state.
>  */
> -int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
> +static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa,
> +			   u8 *blks, int nr_blks, struct nvm_chk_meta *meta)
> {
> 	struct nvm_geo *geo = &dev->geo;
> -	int blk, offset, pl, blktype;
> -
> -	if (nr_blks != geo->num_chk * geo->pln_mode)
> -		return -EINVAL;
> +	int ret, blk, pl, offset, blktype;
> 
> 	for (blk = 0; blk < geo->num_chk; blk++) {
> 		offset = blk * geo->pln_mode;
> 		blktype = blks[offset];
> 
> -		/* Bad blocks on any planes take precedence over other types */
> 		for (pl = 0; pl < geo->pln_mode; pl++) {
> 			if (blks[offset + pl] &
> 					(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
> @@ -859,23 +951,125 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
> 			}
> 		}
> 
> -		blks[blk] = blktype;
> +		meta->wp = 0;
> +		meta->type = NVM_CHK_TP_W_SEQ;
> +		meta->wi = 0;
> +		meta->slba = generic_to_dev_addr(dev, ppa).ppa;
> +		meta->cnlb = dev->geo.clba;
> +
> +		if (blktype == NVM_BLK_T_FREE) {
> +			ppa.a.blk = blk;

This line should be moved above meta->slba assignment. Otherwise, the
slba address does not consider the block.

Also, since this is 1.2 specific, can you use g, instead of a?

I'll send a patch later today implementing chunk metadata retrieval on
erase which relies on this. I'll put it in a small commit in the series
fixing this; feel free to merge it into your patch.

Javier

Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ