lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9d864388-00e8-d5e4-eef7-2f5d4d8f738d@lightnvm.io>
Date:   Sat, 22 Apr 2017 11:22:29 +0200
From:   Matias Bjørling <mb@...htnvm.io>
To:     Javier González <jg@...htnvm.io>
Cc:     linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
        Javier González <javier@...xlabs.com>
Subject: Re: [PATCH 5/5] lightnvm: pblk: fix erase counters on error fail

On 04/22/2017 01:32 AM, Javier González wrote:
> When block erases fail, these blocks are marked bad. The number of valid
> blocks in the line was not updated, which could cause an infinite loop
> on the erase path.
>
> Fix this atomic counter and, in order to avoid taking an irq lock on the
> interrupt context, make the erase counters atomic too.

I can't find out where the counters are used in irq context? Can you 
point me in the right direction? I'll prefer for these counters to go in 
under the existing line_lock.

>
> Also, in the case that a significant number of blocks become bad in a
> line, the result is the double shared metadata buffer (emeta) to stop
> the pipeline until all metadata is flushed to the media. Increase the
> number of metadata lines from 2 to 4 to avoid this case.

How does moving to 4 lines solve this case? The way I read it is that it 
only postpones when this occurs?

>
> Fixes: a4bd217b4326 "lightnvm: physical block device (pblk) target"
>
> Signed-off-by: Javier González <javier@...xlabs.com>
> ---
>  drivers/lightnvm/pblk-core.c  | 28 +++++++++++++++++++---------
>  drivers/lightnvm/pblk-gc.c    |  2 +-
>  drivers/lightnvm/pblk-init.c  |  9 ++++++---
>  drivers/lightnvm/pblk-map.c   |  4 ++--
>  drivers/lightnvm/pblk-rl.c    |  6 ++++--
>  drivers/lightnvm/pblk-write.c |  4 ++--
>  drivers/lightnvm/pblk.h       |  6 +++---
>  7 files changed, 37 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index ac3742b..5e44768 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -29,6 +29,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
>  	pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
>  	atomic_long_inc(&pblk->erase_failed);
>
> +	atomic_dec(&line->blk_in_line);
>  	if (test_and_set_bit(pos, line->blk_bitmap))
>  		pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
>  							line->id, pos);
> @@ -832,21 +833,28 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
>  	struct ppa_addr ppa;
>  	int bit = -1;
>
> -	/* Erase one block at the time and only erase good blocks */
> -	while ((bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
> -						bit + 1)) < lm->blk_per_line) {
> +	/* Erase only good blocks, one at a time */
> +	do {
> +		spin_lock(&line->lock);
> +		bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
> +								bit + 1);
> +		if (bit >= lm->blk_per_line) {
> +			spin_unlock(&line->lock);
> +			break;
> +		}
> +
>  		ppa = pblk->luns[bit].bppa; /* set ch and lun */
>  		ppa.g.blk = line->id;
>
> -		/* If the erase fails, the block is bad and should be marked */
> -		line->left_eblks--;
> +		atomic_dec(&line->left_eblks);
>  		WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
> +		spin_unlock(&line->lock);
>
>  		if (pblk_blk_erase_sync(pblk, ppa)) {
>  			pr_err("pblk: failed to erase line %d\n", line->id);
>  			return -ENOMEM;
>  		}
> -	}
> +	} while (1);
>
>  	return 0;
>  }
> @@ -1007,6 +1015,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
>  static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
>  {
>  	struct pblk_line_meta *lm = &pblk->lm;
> +	int blk_in_line = atomic_read(&line->blk_in_line);
>
>  	line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
>  	if (!line->map_bitmap)
> @@ -1030,12 +1039,13 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
>  		return -EINTR;
>  	}
>  	line->state = PBLK_LINESTATE_OPEN;
> +
> +	atomic_set(&line->left_eblks, blk_in_line);
> +	atomic_set(&line->left_seblks, blk_in_line);
>  	spin_unlock(&line->lock);
>
>  	/* Bad blocks do not need to be erased */
>  	bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
> -	line->left_eblks = line->blk_in_line;
> -	atomic_set(&line->left_seblks, line->left_eblks);
>
>  	kref_init(&line->ref);
>
> @@ -1231,7 +1241,7 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
>  	left_seblks = atomic_read(&new->left_seblks);
>  	if (left_seblks) {
>  		/* If line is not fully erased, erase it */
> -		if (new->left_eblks) {
> +		if (atomic_read(&new->left_eblks)) {
>  			if (pblk_line_erase(pblk, new))
>  				return NULL;
>  		} else {
> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
> index f173fd4..eaf479c 100644
> --- a/drivers/lightnvm/pblk-gc.c
> +++ b/drivers/lightnvm/pblk-gc.c
> @@ -332,7 +332,7 @@ static void pblk_gc_run(struct pblk *pblk)
>  		}
>
>  		line = list_first_entry(group_list, struct pblk_line, list);
> -		nr_blocks_free += line->blk_in_line;
> +		nr_blocks_free += atomic_read(&line->blk_in_line);
>
>  		spin_lock(&line->lock);
>  		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
> index 3996e4b..15b2787 100644
> --- a/drivers/lightnvm/pblk-init.c
> +++ b/drivers/lightnvm/pblk-init.c
> @@ -678,6 +678,8 @@ static int pblk_lines_init(struct pblk *pblk)
>
>  	nr_free_blks = 0;
>  	for (i = 0; i < l_mg->nr_lines; i++) {
> +		int blk_in_line;
> +
>  		line = &pblk->lines[i];
>
>  		line->pblk = pblk;
> @@ -693,14 +695,15 @@ static int pblk_lines_init(struct pblk *pblk)
>  			goto fail_free_lines;
>  		}
>
> -		line->blk_in_line = lm->blk_per_line - nr_bad_blks;
> -		if (line->blk_in_line < lm->min_blk_line) {
> +		blk_in_line = lm->blk_per_line - nr_bad_blks;
> +		if (blk_in_line < lm->min_blk_line) {
>  			line->state = PBLK_LINESTATE_BAD;
>  			list_add_tail(&line->list, &l_mg->bad_list);
>  			continue;
>  		}
>
> -		nr_free_blks += line->blk_in_line;
> +		nr_free_blks += blk_in_line;
> +		atomic_set(&line->blk_in_line, blk_in_line);
>
>  		l_mg->nr_free_lines++;
>  		list_add_tail(&line->list, &l_mg->free_list);
> diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
> index 3f8bab4..17c1695 100644
> --- a/drivers/lightnvm/pblk-map.c
> +++ b/drivers/lightnvm/pblk-map.c
> @@ -110,7 +110,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
>  				continue;
>
>  			set_bit(erase_lun, e_line->erase_bitmap);
> -			e_line->left_eblks--;
> +			atomic_dec(&e_line->left_eblks);
>  			*erase_ppa = rqd->ppa_list[i];
>  			erase_ppa->g.blk = e_line->id;
>
> @@ -129,7 +129,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
>  			return;
>
>  		set_bit(i, e_line->erase_bitmap);
> -		e_line->left_eblks--;
> +		atomic_dec(&e_line->left_eblks);
>  		*erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
>  		erase_ppa->g.blk = e_line->id;
>  	}
> diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
> index 4042162..ab7cbb1 100644
> --- a/drivers/lightnvm/pblk-rl.c
> +++ b/drivers/lightnvm/pblk-rl.c
> @@ -107,9 +107,10 @@ void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
>  void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
>  {
>  	struct pblk *pblk = container_of(rl, struct pblk, rl);
> +	int blk_in_line = atomic_read(&line->blk_in_line);
>  	int ret;
>
> -	atomic_add(line->blk_in_line, &rl->free_blocks);
> +	atomic_add(blk_in_line, &rl->free_blocks);
>  	/* Rates will not change that often - no need to lock update */
>  	ret = pblk_rl_update_rates(rl, rl->rb_budget);
>
> @@ -122,9 +123,10 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
>  void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
>  {
>  	struct pblk *pblk = container_of(rl, struct pblk, rl);
> +	int blk_in_line = atomic_read(&line->blk_in_line);
>  	int ret;
>
> -	atomic_sub(line->blk_in_line, &rl->free_blocks);
> +	atomic_sub(blk_in_line, &rl->free_blocks);
>
>  	/* Rates will not change that often - no need to lock update */
>  	ret = pblk_rl_update_rates(rl, rl->rb_budget);
> diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
> index a896190..aef6fd7 100644
> --- a/drivers/lightnvm/pblk-write.c
> +++ b/drivers/lightnvm/pblk-write.c
> @@ -244,7 +244,7 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
>  	}
>
>  	ppa_set_empty(&erase_ppa);
> -	if (likely(!e_line || !e_line->left_eblks))
> +	if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
>  		pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
>  	else
>  		pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
> @@ -257,7 +257,7 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
>  			struct nvm_geo *geo = &dev->geo;
>  			int bit;
>
> -			e_line->left_eblks++;
> +			atomic_inc(&e_line->left_eblks);
>  			bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
>  			WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
>  			up(&pblk->erase_sem);
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index c82120c..02dd18f 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -363,14 +363,14 @@ struct pblk_line {
>
>  	unsigned int sec_in_line;	/* Number of usable secs in line */
>
> -	unsigned int blk_in_line;	/* Number of good blocks in line */
> +	atomic_t blk_in_line;		/* Number of good blocks in line */
>  	unsigned long *blk_bitmap;	/* Bitmap for valid/invalid blocks */
>  	unsigned long *erase_bitmap;	/* Bitmap for erased blocks */
>
>  	unsigned long *map_bitmap;	/* Bitmap for mapped sectors in line */
>  	unsigned long *invalid_bitmap;	/* Bitmap for invalid sectors in line */
>
> -	int left_eblks;			/* Blocks left for erasing */
> +	atomic_t left_eblks;		/* Blocks left for erasing */
>  	atomic_t left_seblks;		/* Blocks left for sync erasing */
>
>  	int left_msecs;			/* Sectors left for mapping */
> @@ -383,7 +383,7 @@ struct pblk_line {
>  	spinlock_t lock;		/* Necessary for invalid_bitmap only */
>  };
>
> -#define PBLK_DATA_LINES 2
> +#define PBLK_DATA_LINES 4

Why this change? I like to keep new features for 4.13. Only bugfixes for 
4.12.

>
>  enum{
>  	PBLK_KMALLOC_META = 1,
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ