[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <52CBD83E-784F-41E0-BB1E-CE12A93D015E@cnexlabs.com>
Date: Fri, 20 Apr 2018 18:49:39 +0000
From: Javier Gonzalez <javier@...xlabs.com>
To: Hans Holmberg <hans.ml.holmberg@...tronix.com>
CC: Matias Bjørling <mb@...htnvm.io>,
"linux-block@...r.kernel.org" <linux-block@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Hans Holmberg <hans.holmberg@...xlabs.com>
Subject: Re: [PATCH 2/3] lightnvm: pblk: garbage collect lines with failed
writes
> On 19 Apr 2018, at 09.39, Hans Holmberg <hans.ml.holmberg@...tronix.com> wrote:
>
> From: Hans Holmberg <hans.holmberg@...xlabs.com>
>
> Write failures should not happen under normal circumstances,
> so in order to bring the chunk back into a known state as soon
> as possible, evacuate all the valid data out of the line and let the
> fw judge if the block can be written to in the next reset cycle.
>
> Do this by introducing a new gc list for lines with failed writes,
> and ensure that the rate limiter allocates a small portion of
> the write bandwidth to get the job done.
>
> The lba list is saved in memory for use during gc as we
> cannot gurantee that the emeta data is readable if a write
> error occurred.
>
> Signed-off-by: Hans Holmberg <hans.holmberg@...xlabs.com>
> ---
> drivers/lightnvm/pblk-core.c | 43 +++++++++++++++++++++--
> drivers/lightnvm/pblk-gc.c | 79 +++++++++++++++++++++++++++----------------
> drivers/lightnvm/pblk-init.c | 39 ++++++++++++++-------
> drivers/lightnvm/pblk-rl.c | 29 +++++++++++++---
> drivers/lightnvm/pblk-sysfs.c | 15 ++++++--
> drivers/lightnvm/pblk-write.c | 2 ++
> drivers/lightnvm/pblk.h | 25 +++++++++++---
> 7 files changed, 178 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 7762e89..f6135e4 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -373,7 +373,13 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
>
> lockdep_assert_held(&line->lock);
>
> - if (!vsc) {
> + if (line->w_err_gc->has_write_err) {
> + if (line->gc_group != PBLK_LINEGC_WERR) {
> + line->gc_group = PBLK_LINEGC_WERR;
> + move_list = &l_mg->gc_werr_list;
> + pblk_rl_werr_line_in(&pblk->rl);
> + }
> + } else if (!vsc) {
> if (line->gc_group != PBLK_LINEGC_FULL) {
> line->gc_group = PBLK_LINEGC_FULL;
> move_list = &l_mg->gc_full_list;
> @@ -1603,8 +1609,13 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
> line->state = PBLK_LINESTATE_FREE;
> line->gc_group = PBLK_LINEGC_NONE;
> pblk_line_free(line);
> - spin_unlock(&line->lock);
>
> + if (line->w_err_gc->has_write_err) {
> + pblk_rl_werr_line_out(&pblk->rl);
> + line->w_err_gc->has_write_err = 0;
> + }
> +
> + spin_unlock(&line->lock);
> atomic_dec(&gc->pipeline_gc);
>
> spin_lock(&l_mg->free_lock);
> @@ -1767,11 +1778,32 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
>
> spin_lock(&l_mg->close_lock);
> spin_lock(&line->lock);
> +
> + /* Update the in-memory start address for emeta, in case it has
> + * shifted due to write errors
> + */
> + if (line->emeta_ssec != line->cur_sec)
> + line->emeta_ssec = line->cur_sec;
> +
> list_add_tail(&line->list, &l_mg->emeta_list);
> spin_unlock(&line->lock);
> spin_unlock(&l_mg->close_lock);
>
> pblk_line_should_sync_meta(pblk);
> +
> +
> +}
> +
> +static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
> +{
> + struct pblk_line_meta *lm = &pblk->lm;
> + unsigned int lba_list_size = lm->emeta_len[2];
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> + struct pblk_emeta *emeta = line->emeta;
> +
> + w_err_gc->lba_list = kmalloc(lba_list_size, GFP_KERNEL);
> + memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
> + lba_list_size);
> }
>
> void pblk_line_close_ws(struct work_struct *work)
> @@ -1780,6 +1812,13 @@ void pblk_line_close_ws(struct work_struct *work)
> ws);
> struct pblk *pblk = line_ws->pblk;
> struct pblk_line *line = line_ws->line;
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> +
> + /* Write errors makes the emeta start address stored in smeta invalid,
> + * so keep a copy of the lba list until we've gc'd the line
> + */
> + if (w_err_gc->has_write_err)
> + pblk_save_lba_list(pblk, line);
>
> pblk_line_close(pblk, line);
> mempool_free(line_ws, pblk->gen_ws_pool);
> diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
> index b0cc277..62f0548 100644
> --- a/drivers/lightnvm/pblk-gc.c
> +++ b/drivers/lightnvm/pblk-gc.c
> @@ -138,10 +138,10 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
> struct pblk_line_mgmt *l_mg = &pblk->l_mg;
> struct pblk_line_meta *lm = &pblk->lm;
> struct pblk_gc *gc = &pblk->gc;
> - struct line_emeta *emeta_buf;
> + struct line_emeta *emeta_buf = NULL;
> struct pblk_line_ws *gc_rq_ws;
> struct pblk_gc_rq *gc_rq;
> - __le64 *lba_list;
> + __le64 *lba_list = NULL;
> unsigned long *invalid_bitmap;
> int sec_left, nr_secs, bit;
> int ret;
> @@ -150,34 +150,42 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
> if (!invalid_bitmap)
> goto fail_free_ws;
>
> - emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
> - GFP_KERNEL);
> - if (!emeta_buf) {
> - pr_err("pblk: cannot use GC emeta\n");
> - goto fail_free_bitmap;
> - }
> -
> - ret = pblk_line_read_emeta(pblk, line, emeta_buf);
> - if (ret) {
> - pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
> - goto fail_free_emeta;
> - }
> + if (line->w_err_gc->has_write_err) {
> + lba_list = line->w_err_gc->lba_list;
> + } else {
> + emeta_buf = pblk_malloc(lm->emeta_len[0],
> + l_mg->emeta_alloc_type, GFP_KERNEL);
> + if (!emeta_buf) {
> + pr_err("pblk: cannot use GC emeta\n");
> + goto fail_free_bitmap;
> + }
>
> - /* If this read fails, it means that emeta is corrupted. For now, leave
> - * the line untouched. TODO: Implement a recovery routine that scans and
> - * moves all sectors on the line.
> - */
> + ret = pblk_line_read_emeta(pblk, line, emeta_buf);
> + if (ret) {
> + pr_err("pblk: line %d read emeta failed (%d)\n",
> + line->id, ret);
> + goto fail_free_emeta;
> + }
>
> - ret = pblk_recov_check_emeta(pblk, emeta_buf);
> - if (ret) {
> - pr_err("pblk: inconsistent emeta (line %d)\n", line->id);
> - goto fail_free_emeta;
> - }
> + /* If this read fails, it means that emeta is corrupted.
> + * For now, leave the line untouched.
> + * TODO: Implement a recovery routine that scans and moves
> + * all sectors on the line.
> + */
> +
> + ret = pblk_recov_check_emeta(pblk, emeta_buf);
> + if (ret) {
> + pr_err("pblk: inconsistent emeta (line %d)\n",
> + line->id);
> + goto fail_free_emeta;
> + }
>
> - lba_list = emeta_to_lbas(pblk, emeta_buf);
> - if (!lba_list) {
> - pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
> - goto fail_free_emeta;
> + lba_list = emeta_to_lbas(pblk, emeta_buf);
> + if (!lba_list) {
> + pr_err("pblk: could not interpret emeta (line %d)\n",
> + line->id);
> + goto fail_free_emeta;
> + }
> }
would it be an idea to make move all the logic above to a different
function returning lba_list? This way, we do not have an extra indent
for a single line use case.
>
> spin_lock(&line->lock);
> @@ -240,7 +248,12 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
> goto next_rq;
>
> out:
> - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
> + if (line->w_err_gc->has_write_err) {
> + kfree(lba_list);
> + line->w_err_gc->lba_list = NULL;
> + } else
> + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
> +
> kfree(line_ws);
> kfree(invalid_bitmap);
>
> @@ -252,7 +265,11 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
> fail_free_gc_rq:
> kfree(gc_rq);
> fail_free_emeta:
> - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
> + if (line->w_err_gc->has_write_err) {
> + kfree(lba_list);
> + line->w_err_gc->lba_list = NULL;
> + } else
> + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
Can you open/close brackets here too?
> fail_free_bitmap:
> kfree(invalid_bitmap);
> fail_free_ws:
> @@ -349,12 +366,14 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
> static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
> {
> unsigned int nr_blocks_free, nr_blocks_need;
> + unsigned int werr_lines = atomic_read(&rl->werr_lines);
>
> nr_blocks_need = pblk_rl_high_thrs(rl);
> nr_blocks_free = pblk_rl_nr_free_blks(rl);
>
> /* This is not critical, no need to take lock here */
> - return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
> + return ((werr_lines > 0) ||
> + ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
> }
>
> void pblk_gc_free_full_lines(struct pblk *pblk)
> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
> index 6f06727..092e361 100644
> --- a/drivers/lightnvm/pblk-init.c
> +++ b/drivers/lightnvm/pblk-init.c
> @@ -495,9 +495,14 @@ static void pblk_line_mg_free(struct pblk *pblk)
>
> static void pblk_line_meta_free(struct pblk_line *line)
> {
> + struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
> +
> kfree(line->blk_bitmap);
> kfree(line->erase_bitmap);
> kfree(line->chks);
> +
> + kfree(w_err_gc->lba_list);
> + kfree(w_err_gc);
> }
>
> static void pblk_lines_free(struct pblk *pblk)
> @@ -813,20 +818,28 @@ static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line)
> return -ENOMEM;
>
> line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
> - if (!line->erase_bitmap) {
> - kfree(line->blk_bitmap);
> - return -ENOMEM;
> - }
> + if (!line->erase_bitmap)
> + goto free_blk_bitmap;
> +
>
> line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
> GFP_KERNEL);
> - if (!line->chks) {
> - kfree(line->erase_bitmap);
> - kfree(line->blk_bitmap);
> - return -ENOMEM;
> - }
> + if (!line->chks)
> + goto free_erase_bitmap;
> +
> + line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
> + if (!line->w_err_gc)
> + goto free_chks;
>
> return 0;
> +
> +free_chks:
> + kfree(line->chks);
> +free_erase_bitmap:
> + kfree(line->erase_bitmap);
> +free_blk_bitmap:
> + kfree(line->blk_bitmap);
> + return -ENOMEM;
> }
>
> static int pblk_line_mg_init(struct pblk *pblk)
> @@ -851,12 +864,14 @@ static int pblk_line_mg_init(struct pblk *pblk)
> INIT_LIST_HEAD(&l_mg->gc_mid_list);
> INIT_LIST_HEAD(&l_mg->gc_low_list);
> INIT_LIST_HEAD(&l_mg->gc_empty_list);
> + INIT_LIST_HEAD(&l_mg->gc_werr_list);
>
> INIT_LIST_HEAD(&l_mg->emeta_list);
>
> - l_mg->gc_lists[0] = &l_mg->gc_high_list;
> - l_mg->gc_lists[1] = &l_mg->gc_mid_list;
> - l_mg->gc_lists[2] = &l_mg->gc_low_list;
> + l_mg->gc_lists[0] = &l_mg->gc_werr_list;
> + l_mg->gc_lists[1] = &l_mg->gc_high_list;
> + l_mg->gc_lists[2] = &l_mg->gc_mid_list;
> + l_mg->gc_lists[3] = &l_mg->gc_low_list;
>
> spin_lock_init(&l_mg->free_lock);
> spin_lock_init(&l_mg->close_lock);
> diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
> index 883a711..6a0616a 100644
> --- a/drivers/lightnvm/pblk-rl.c
> +++ b/drivers/lightnvm/pblk-rl.c
> @@ -73,6 +73,16 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
> pblk_rl_kick_u_timer(rl);
> }
>
> +void pblk_rl_werr_line_in(struct pblk_rl *rl)
> +{
> + atomic_inc(&rl->werr_lines);
> +}
> +
> +void pblk_rl_werr_line_out(struct pblk_rl *rl)
> +{
> + atomic_dec(&rl->werr_lines);
> +}
> +
> void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
> {
> atomic_add(nr_entries, &rl->rb_gc_cnt);
> @@ -99,11 +109,21 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
> {
> struct pblk *pblk = container_of(rl, struct pblk, rl);
> int max = rl->rb_budget;
> + int werr_gc_needed = atomic_read(&rl->werr_lines);
>
> if (free_blocks >= rl->high) {
> - rl->rb_user_max = max;
> - rl->rb_gc_max = 0;
> - rl->rb_state = PBLK_RL_HIGH;
> + if (werr_gc_needed) {
> + /* Allocate a small budget for recovering
> + * lines with write errors
> + */
> + rl->rb_gc_max = 1 << rl->rb_windows_pw;
> + rl->rb_user_max = max - rl->rb_gc_max;
> + rl->rb_state = PBLK_RL_WERR;
> + } else {
> + rl->rb_user_max = max;
> + rl->rb_gc_max = 0;
> + rl->rb_state = PBLK_RL_OFF;
> + }
> } else if (free_blocks < rl->high) {
> int shift = rl->high_pw - rl->rb_windows_pw;
> int user_windows = free_blocks >> shift;
> @@ -124,7 +144,7 @@ static void __pblk_rl_update_rates(struct pblk_rl *rl,
> rl->rb_state = PBLK_RL_LOW;
> }
>
> - if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW))
> + if (rl->rb_state != PBLK_RL_OFF)
> pblk_gc_should_start(pblk);
> else
> pblk_gc_should_stop(pblk);
> @@ -221,6 +241,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
> atomic_set(&rl->rb_user_cnt, 0);
> atomic_set(&rl->rb_gc_cnt, 0);
> atomic_set(&rl->rb_space, -1);
> + atomic_set(&rl->werr_lines, 0);
>
> timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
>
> diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
> index e61909a..88a0a7c 100644
> --- a/drivers/lightnvm/pblk-sysfs.c
> +++ b/drivers/lightnvm/pblk-sysfs.c
> @@ -173,6 +173,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
> int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
> int d_line_cnt = 0, l_line_cnt = 0;
> int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
> + int gc_werr = 0;
> +
> int bad = 0, cor = 0;
> int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
> int map_weight = 0, meta_weight = 0;
> @@ -237,6 +239,15 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
> gc_empty++;
> }
>
> + list_for_each_entry(line, &l_mg->gc_werr_list, list) {
> + if (line->type == PBLK_LINETYPE_DATA)
> + d_line_cnt++;
> + else if (line->type == PBLK_LINETYPE_LOG)
> + l_line_cnt++;
> + closed_line_cnt++;
> + gc_werr++;
> + }
> +
> list_for_each_entry(line, &l_mg->bad_list, list)
> bad++;
> list_for_each_entry(line, &l_mg->corrupt_list, list)
> @@ -275,8 +286,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
> l_mg->nr_lines);
>
> sz += snprintf(page + sz, PAGE_SIZE - sz,
> - "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
> - gc_full, gc_high, gc_mid, gc_low, gc_empty,
> + "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
> + gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
> atomic_read(&pblk->gc.read_inflight_gc));
>
> sz += snprintf(page + sz, PAGE_SIZE - sz,
> diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
> index ab45157..3b6bead 100644
> --- a/drivers/lightnvm/pblk-write.c
> +++ b/drivers/lightnvm/pblk-write.c
> @@ -136,6 +136,7 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
> }
> }
>
> + line->w_err_gc->has_write_err = 1;
> spin_unlock(&line->lock);
> }
>
> @@ -277,6 +278,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
> if (rqd->error) {
> pblk_log_write_err(pblk, rqd);
> pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
> + line->w_err_gc->has_write_err = 1;
> }
>
> sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index cff6aea..a4e55d8 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -89,12 +89,14 @@ struct pblk_sec_meta {
> /* The number of GC lists and the rate-limiter states go together. This way the
> * rate-limiter can dictate how much GC is needed based on resource utilization.
> */
> -#define PBLK_GC_NR_LISTS 3
> +#define PBLK_GC_NR_LISTS 4
>
> enum {
> - PBLK_RL_HIGH = 1,
> - PBLK_RL_MID = 2,
> - PBLK_RL_LOW = 3,
> + PBLK_RL_OFF = 0,
> + PBLK_RL_WERR = 1,
> + PBLK_RL_HIGH = 2,
> + PBLK_RL_MID = 3,
> + PBLK_RL_LOW = 4
> };
>
> #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
> @@ -278,6 +280,8 @@ struct pblk_rl {
> int rb_user_active;
> int rb_gc_active;
>
> + atomic_t werr_lines; /* Number of write error lines that needs gc */
> +
> struct timer_list u_timer;
>
> unsigned long long nr_secs;
> @@ -311,6 +315,7 @@ enum {
> PBLK_LINEGC_MID = 23,
> PBLK_LINEGC_HIGH = 24,
> PBLK_LINEGC_FULL = 25,
> + PBLK_LINEGC_WERR = 26
> };
>
> #define PBLK_MAGIC 0x70626c6b /*pblk*/
> @@ -412,6 +417,11 @@ struct pblk_smeta {
> struct line_smeta *buf; /* smeta buffer in persistent format */
> };
>
> +struct pblk_w_err_gc {
> + int has_write_err;
> + __le64 *lba_list;
> +};
> +
> struct pblk_line {
> struct pblk *pblk;
> unsigned int id; /* Line number corresponds to the
> @@ -457,6 +467,8 @@ struct pblk_line {
>
> struct kref ref; /* Write buffer L2P references */
>
> + struct pblk_w_err_gc *w_err_gc; /* Write error gc recovery metadata */
> +
> spinlock_t lock; /* Necessary for invalid_bitmap only */
> };
>
> @@ -488,6 +500,8 @@ struct pblk_line_mgmt {
> struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
> struct list_head gc_low_list; /* Full lines ready to GC, low isc */
>
> + struct list_head gc_werr_list; /* Write err recovery list */
> +
> struct list_head gc_full_list; /* Full lines ready to GC, no valid */
> struct list_head gc_empty_list; /* Full lines close, all valid */
>
> @@ -894,6 +908,9 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
> bool used);
> int pblk_rl_is_limit(struct pblk_rl *rl);
>
> +void pblk_rl_werr_line_in(struct pblk_rl *rl);
> +void pblk_rl_werr_line_out(struct pblk_rl *rl);
> +
> /*
> * pblk sysfs
> */
> --
> 2.7.4
Otherwise, it looks good to me
Javier
Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)
Powered by blists - more mailing lists