[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20070217165625.5845.54721.sendpatchset@localhost.localdomain>
Date: Sat, 17 Feb 2007 18:56:25 +0200
From: Artem Bityutskiy <dedekind@...radead.org>
To: Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Cc: Christoph Hellwig <hch@...radead.org>,
Artem Bityutskiy <dedekind@...radead.org>,
Frank Haverkamp <haver@...t.ibm.com>,
Thomas Gleixner <tglx@...utronix.de>,
David Woodhouse <dwmw2@...radead.org>,
Josh Boyer <jwboyer@...ux.vnet.ibm.com>
Subject: [PATCH 24/44 take 2] [UBI] wear-leveling unit implementation
diff -auNrp tmp-from/drivers/mtd/ubi/wl.c tmp-to/drivers/mtd/ubi/wl.c
--- tmp-from/drivers/mtd/ubi/wl.c 1970-01-01 02:00:00.000000000 +0200
+++ tmp-to/drivers/mtd/ubi/wl.c 2007-02-17 18:07:27.000000000 +0200
@@ -0,0 +1,1684 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors: Artem B. Bityutskiy, Thomas Gleixner
+ */
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/wait.h>
+#include <linux/crc32.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <mtd/ubi-header.h>
+#include "ubi.h"
+#include "alloc.h"
+#include "wl.h"
+#include "badeb.h"
+#include "io.h"
+#include "account.h"
+#include "eba.h"
+#include "background.h"
+#include "scan.h"
+#include "misc.h"
+#include "debug.h"
+
+/* Number of physical eraseblocks reserved for wear-leveling purposes */
+#define WL_RESERVED_PEBS 1
+
+/*
+ * How many erase cycles are short term, unknown, and long term physical
+ * eraseblocks protected.
+ */
+#define ST_PROTECTION 16
+#define U_PROTECTION 10
+#define LT_PROTECTION 4
+
+/*
+ * Maximum difference between two erase counters. If this threshold is
+ * exceeded, the WL unit starts moving data from used physical eraseblocks with
+ * low erase counter to free physical eraseblocks with high erase counter.
+ */
+#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
+
+/*
+ * When a physical eraseblock is moved, the WL unit has to pick the target
+ * physical eraseblock to move to. The simplest way would be just to pick the
+ * one with the highest erase counter. But in certain workload this could lead
+ * to an unbounded wearing of one or few physical eraseblock. Indeed, imagine a
+ * situation when the picked physical eraseblock is constantly erased after the
+ * data is written to it. So, we have a constant which limits the highest
+ * erase counter of the free physical eraseblock to pick. Namely, the WL unit
+ * does not pick eraseblocks with erase counter greater then the lowest erase
+ * counter plus %WL_FREE_MAX_DIFF.
+ */
+#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID_WL
+static int paranoid_check_ec(const struct ubi_info *ubi, int pnum, int ec);
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root);
+#else
+#define paranoid_check_ec(ubi, pnum, ec) 0
+#define paranoid_check_in_wl_tree(e, root)
+#endif
+
+/**
+ * tree_empty - a helper function to check if an RB-tree is empty.
+ *
+ * @root: the root of the tree
+ *
+ * This function returns non-zero if the tree is empty and zero if not.
+ */
+static inline int tree_empty(struct rb_root *root)
+{
+ return root->rb_node == NULL;
+}
+
+static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root);
+
+/* Functions to add and delete wear-leveling entries from different trees */
+
+static inline void free_tree_add(struct ubi_wl_info *wl,
+ struct ubi_wl_entry *e)
+{
+ wl_tree_add(e, &wl->free);
+}
+static inline void used_tree_add(struct ubi_wl_info *wl,
+ struct ubi_wl_entry *e)
+{
+ wl_tree_add(e, &wl->used);
+}
+static inline void scrub_tree_add(struct ubi_wl_info *wl,
+ struct ubi_wl_entry *e)
+{
+ wl_tree_add(e, &wl->scrub);
+}
+
+static inline void free_tree_del(struct ubi_wl_info *wl,
+ struct ubi_wl_entry *e)
+{
+ paranoid_check_in_wl_tree(e, &wl->free);
+ rb_erase(&e->rb, &wl->free);
+}
+static inline void used_tree_del(struct ubi_wl_info *wl,
+ struct ubi_wl_entry *e)
+{
+ paranoid_check_in_wl_tree(e, &wl->used);
+ rb_erase(&e->rb, &wl->used);
+}
+static inline void scrub_tree_del(struct ubi_wl_info *wl,
+ struct ubi_wl_entry *e)
+{
+ paranoid_check_in_wl_tree(e, &wl->scrub);
+ rb_erase(&e->rb, &wl->scrub);
+}
+
+static int produce_free(const struct ubi_info *ubi);
+static struct ubi_wl_entry *pick_long_term(struct ubi_wl_info *wl);
+static struct ubi_wl_entry *pick_unknown(struct ubi_wl_info *wl);
+static struct ubi_wl_entry *pick_short_term(struct ubi_wl_info *wl);
+static void prot_tree_add(struct ubi_wl_info *wl, struct ubi_wl_entry *e,
+ struct ubi_wl_prot_entry *pe, int abs_ec);
+
+int ubi_wl_get_peb(const struct ubi_info *ubi, enum ubi_data_type dtype)
+{
+ int err, protect;
+ struct ubi_wl_entry *e;
+ struct ubi_wl_info *wl = ubi->wl;
+ struct ubi_wl_prot_entry *pe;
+
+ might_sleep();
+
+ /* Input arguments sanity check */
+ ubi_assert(dtype == UBI_DATA_LONGTERM || dtype == UBI_DATA_SHORTTERM ||
+ dtype == UBI_DATA_UNKNOWN);
+
+ pe = ubi_alloc_wl_prot_entry();
+ if (unlikely(!pe))
+ return -ENOMEM;
+
+retry:
+ spin_lock(&wl->lock);
+ if (unlikely(tree_empty(&wl->free))) {
+ if (unlikely(wl->erase_pending == 0)) {
+ ubi_err("no free eraseblocks");
+ spin_unlock(&wl->lock);
+ ubi_free_wl_prot_entry(pe);
+ return -ENOSPC;
+ }
+ spin_unlock(&wl->lock);
+
+ err = produce_free(ubi);
+ if (unlikely(err < 0)) {
+ ubi_free_wl_prot_entry(pe);
+ return err;
+ }
+ goto retry;
+ }
+
+ switch (dtype) {
+ case UBI_DATA_LONGTERM:
+ e = pick_long_term(wl);
+ protect = LT_PROTECTION;
+ break;
+ case UBI_DATA_UNKNOWN:
+ e = pick_unknown(wl);
+ protect = U_PROTECTION;
+ break;
+ case UBI_DATA_SHORTTERM:
+ e = pick_short_term(wl);
+ protect = ST_PROTECTION;
+ break;
+ default:
+ protect = 0;
+ e = NULL;
+ BUG();
+ }
+
+ /*
+ * Move the physical eraseblock to the protection trees where it will
+ * be protected from being moved for some time.
+ */
+ free_tree_del(wl, e);
+ prot_tree_add(wl, e, pe, protect);
+
+ dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
+ spin_unlock(&wl->lock);
+
+ return e->pnum;
+}
+
+static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root);
+static int schedule_erase(const struct ubi_info *ubi, struct ubi_wl_entry *e,
+ int torture);
+static void check_protection_over(struct ubi_wl_info *wl);
+static void prot_tree_del(struct ubi_wl_info *wl, int pnum);
+
+int ubi_wl_put_peb(const struct ubi_info *ubi, int pnum, int torture)
+{
+ int err;
+ struct ubi_wl_entry *e;
+ struct ubi_wl_info *wl = ubi->wl;
+
+ dbg_wl("PEB %d", pnum);
+ might_sleep();
+
+ /* Input arguments sanity check */
+ ubi_assert(pnum >= 0);
+ ubi_assert(pnum < ubi->io->peb_count);
+
+ spin_lock(&wl->lock);
+ ubi_assert(wl->erase_pending >= 0);
+ wl->erase_pending += 1;
+
+ e = wl->lookuptbl[pnum];
+ if (unlikely(e == wl->move)) {
+ /*
+ * User is putting a physical eraseblock which was selected to
+ * be moved. We cancel the movement by setting @wl->move to
+ * %NULL. The wear-leveling worker has to notice this and
+ * cancel.
+ *
+ * Note, the physical eraseblock was removed from the @wl->used
+ * tree by the wear-leveling worker and is not in any tree now.
+ */
+ dbg_wl("cancel PEB %d movement", pnum);
+ wl->move = NULL;
+ } else {
+ if (in_wl_tree(e, &wl->used))
+ used_tree_del(wl, e);
+ else if (unlikely(in_wl_tree(e, &wl->scrub)))
+ scrub_tree_del(wl, e);
+ else
+ prot_tree_del(wl, e->pnum);
+ }
+ spin_unlock(&wl->lock);
+
+ err = schedule_erase(ubi, e, torture);
+ if (unlikely(err)) {
+ spin_lock(&wl->lock);
+ wl->erase_pending -= 1;
+ used_tree_add(wl, e);
+ spin_unlock(&wl->lock);
+ }
+
+ return err;
+}
+
+static int ensure_wear_leveling(const struct ubi_info *ubi);
+
+int ubi_wl_scrub_peb(const struct ubi_info *ubi, int pnum)
+{
+ struct ubi_wl_entry *e;
+ struct ubi_wl_info *wl = ubi->wl;
+
+ dbg_wl("schedule PEB %d for scrubbing", pnum);
+
+ spin_lock(&wl->lock);
+ e = wl->lookuptbl[pnum];
+ if (e == wl->move || in_wl_tree(e, &wl->scrub)) {
+ spin_unlock(&wl->lock);
+ return 0;
+ }
+
+ if (in_wl_tree(e, &wl->used))
+ used_tree_del(wl, e);
+ else
+ prot_tree_del(wl, pnum);
+
+ scrub_tree_add(wl, e);
+ spin_unlock(&wl->lock);
+
+ /*
+ * Technically scrubbing is the same as wear-levelling, so it is done
+ * by the WL worker. Schedule it.
+ */
+ return ensure_wear_leveling(ubi);
+}
+
+static int erase_worker(const struct ubi_info *ubi, struct ubi_bgt_work *wrk,
+ int cancel);
+
+int ubi_wl_flush(const struct ubi_info *ubi)
+{
+ int err, pending_count;
+
+ pending_count = ubi->bgt->pending_works_count;
+
+ dbg_wl("flush (%d pending works)", pending_count);
+
+ /*
+ * Erase while the pending works queue is not empty, but not more then
+ * the number of currently pending works.
+ */
+ while (pending_count-- > 0) {
+ err = ubi_bgt_do_work(ubi);
+ if (unlikely(err))
+ return err;
+ }
+
+ return 0;
+}
+
+static void tree_destroy(struct rb_root *root);
+
+int ubi_wl_init_scan(struct ubi_info *ubi, struct ubi_scan_info *si)
+{
+ int err;
+ struct rb_node *rb1, *rb2;
+ struct ubi_scan_volume *sv;
+ struct ubi_scan_leb *seb, *tmp;
+ struct ubi_wl_entry *e;
+ struct ubi_wl_info *wl;
+ const struct ubi_io_info *io = ubi->io;
+
+ dbg_wl("initialize the UBI wear-leveling unit");
+
+ wl = ubi_kzalloc(sizeof(struct ubi_wl_info));
+ if (!wl)
+ return -ENOMEM;
+ ubi->wl = wl;
+
+ wl->used = wl->free = wl->scrub = RB_ROOT;
+ wl->prot.pnum = wl->prot.aec = RB_ROOT;
+ spin_lock_init(&wl->lock);
+ wl->max_ec = si->max_ec;
+
+ err = -ENOMEM;
+ wl->lookuptbl = ubi_kzalloc(io->peb_count * sizeof(void *));
+ if (!wl->lookuptbl)
+ goto out_free;
+
+ /*
+ * The way how we distinguish between older LEB and newer LEB is based
+ * on the following principles:
+ * 1 if we have LEB with versions A and B, A < B, then B is newer then
+ * A when abs(B - A) < %0x7FFFFFFF
+ * 2 as the WL unit guarantees that the length of the pending works
+ * queue is shorter then %0x7FFFFFFF works, and the works are put at
+ * the tail of the queue and got from its head, the above algorithm
+ * works correctly.
+ *
+ * Now we've got a list of eraseblocks to erase, and they are now
+ * out-of-order, which does not satisfy the 2nd item, so we've got to
+ * erase them now instead of deferring this.
+ */
+ list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
+ cond_resched();
+
+ dbg_wl("erase PEB %d", seb->pnum);
+ err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec + 1);
+ if (unlikely(err)) {
+ if (err != -EIO && err != -EROFS)
+ goto out_free;
+ list_del(&seb->u.list);
+ list_add_tail(&seb->u.list, &si->corr);
+ } else {
+ seb->ec += 1;
+ list_del(&seb->u.list);
+ list_add_tail(&seb->u.list, &si->free);
+ }
+ }
+
+ list_for_each_entry(seb, &si->free, u.list) {
+ cond_resched();
+
+ e = ubi_alloc_wl_entry();
+ if (unlikely(!e))
+ goto out_free;
+
+ e->pnum = seb->pnum;
+ e->ec = seb->ec;
+ ubi_assert(e->ec >= 0);
+ free_tree_add(wl, e);
+ wl->lookuptbl[e->pnum] = e;
+ }
+
+ list_for_each_entry(seb, &si->corr, u.list) {
+ cond_resched();
+
+ e = ubi_alloc_wl_entry();
+ if (unlikely(!e)) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ e->pnum = seb->pnum;
+ e->ec = seb->ec;
+ wl->lookuptbl[e->pnum] = e;
+ wl->erase_pending += 1;
+ err = schedule_erase(ubi, e, 0);
+ if (unlikely(err)) {
+ ubi_free_wl_entry(e);
+ goto out_free;
+ }
+ }
+
+ rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+ rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+ cond_resched();
+
+ e = ubi_alloc_wl_entry();
+ if (unlikely(!e))
+ goto out_free;
+
+ e->pnum = seb->pnum;
+ e->ec = seb->ec;
+ wl->lookuptbl[e->pnum] = e;
+ if (!seb->scrub) {
+ dbg_wl("add PEB %d EC %d to the used tree",
+ e->pnum, e->ec);
+ used_tree_add(wl, e);
+ } else {
+ dbg_wl("add PEB %d EC %d to the scrub tree",
+ e->pnum, e->ec);
+ scrub_tree_add(wl, e);
+ }
+ }
+ }
+
+ err = ubi_acc_reserve(ubi, WL_RESERVED_PEBS);
+ if (err)
+ goto out_free;
+
+ /* Schedule wear-leveling if needed */
+ err = ensure_wear_leveling(ubi);
+ if (err)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ tree_destroy(&wl->used);
+ tree_destroy(&wl->free);
+ tree_destroy(&wl->scrub);
+ ubi_kfree(wl->lookuptbl);
+ ubi_kfree(wl);
+ return err;
+}
+
+static void protection_trees_destroy(struct ubi_wl_info *wl);
+
+void ubi_wl_close(struct ubi_info *ubi)
+{
+ struct ubi_wl_info *wl = ubi->wl;
+
+ dbg_wl("close the UBI wear-leveling unit");
+
+ protection_trees_destroy(wl);
+ tree_destroy(&wl->used);
+ tree_destroy(&wl->free);
+ tree_destroy(&wl->scrub);
+ ubi_kfree(wl->lookuptbl);
+ ubi_kfree(wl);
+}
+
+/**
+ * find_wl_entry - find a wl entry closest to certain erase counter.
+ *
+ * @root: the RB-tree where to look for
+ * @max: highest erase possible counter
+ *
+ * This function looks for a wear leveling entry erase counter closest to @max
+ * and less then @max.
+ */
+static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
+{
+ struct rb_node *p;
+ struct ubi_wl_entry *e;
+
+ e = rb_entry(rb_first(root), struct ubi_wl_entry, rb);
+ max += e->ec;
+
+ p = root->rb_node;
+ while (p) {
+ struct ubi_wl_entry *e1;
+
+ e1 = rb_entry(p, struct ubi_wl_entry, rb);
+ if (e1->ec >= max)
+ p = p->rb_left;
+ else {
+ p = p->rb_right;
+ e = e1;
+ }
+ }
+
+ return e;
+}
+
+/**
+ * pick_long_term - select a "long-term" physical eraseblock.
+ *
+ * @wl: the wear-leveling unit description data structure
+ *
+ * This function returns the requested physical eraseblock. The wl->lock must
+ * be locked. The @wl->free list must not be empty.
+ */
+static struct ubi_wl_entry *pick_long_term(struct ubi_wl_info *wl)
+{
+ struct ubi_wl_entry *e;
+
+ /*
+ * For long term data we pick a physical eraseblock with high erase
+ * counter. But the highest erase counter we can pick is bounded by
+ * the the lowest erase counter plus %WL_FREE_MAX_DIFF.
+ */
+ e = find_wl_entry(&wl->free, WL_FREE_MAX_DIFF);
+ return e;
+}
+
+/**
+ * pick_unknown - select an "unknown" physical eraseblock.
+ *
+ * @wl: the wear-leveling unit description data structure
+ *
+ * This function returns the requested physical eraseblock. The wl->lock must
+ * be locked. The @wl->free list must not be empty.
+ */
+static struct ubi_wl_entry *pick_unknown(struct ubi_wl_info *wl)
+{
+ int medium_ec;
+ struct rb_node *p;
+ struct ubi_wl_entry *first, *last, *e;
+
+ /*
+ * For unknown data we are trying to pick a physical eraseblock with
+ * medium erase counter. But we by no means can pick a physical
+ * eraseblock with erase counter greater or equivalent then the the
+ * lowest erase counter plus %WL_FREE_MAX_DIFF.
+ */
+
+ first = rb_entry(rb_first(&wl->free), struct ubi_wl_entry, rb);
+ last = rb_entry(rb_last(&wl->free), struct ubi_wl_entry, rb);
+
+ if (last->ec - first->ec < WL_FREE_MAX_DIFF)
+ return rb_entry(wl->free.rb_node, struct ubi_wl_entry, rb);
+
+ medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
+ e = first;
+
+ p = wl->free.rb_node;
+ while (p) {
+ struct ubi_wl_entry *e1;
+
+ e1 = rb_entry(p, struct ubi_wl_entry, rb);
+ if (e1->ec >= medium_ec)
+ p = p->rb_left;
+ else {
+ p = p->rb_right;
+ e = e1;
+ }
+ }
+
+ return e;
+}
+
+/**
+ * pick_short_term - select a "short term" physical eraseblock.
+ *
+ * @wl: the wear-leveling unit description data structure
+ *
+ * This function returns the requested physical eraseblock. The wl->lock must
+ * be locked. The @wl->free list must not be empty.
+ */
+static struct ubi_wl_entry *pick_short_term(struct ubi_wl_info *wl)
+{
+ struct ubi_wl_entry *e;
+
+ /*
+ * For short term data we pick a physical eraseblock with the lowest
+ * erase counter as we expect it will be erased soon.
+ */
+ e = rb_entry(rb_first(&wl->free), struct ubi_wl_entry, rb);
+ return e;
+}
+
+/**
+ * prot_tree_add - add a the physical eraseblock to the protection trees.
+ *
+ * @wl: the wear-leveling unit description data structure
+ * @e: the physical eraseblock to add
+ * @pe: a protection entry object to use
+ * @abs_ec: the absolute erase counter value when this physical eraseblock has
+ * to be removed from the protection trees.
+ *
+ * @wl->lock has to be locked.
+ */
+static void prot_tree_add(struct ubi_wl_info *wl, struct ubi_wl_entry *e,
+ struct ubi_wl_prot_entry *pe, int abs_ec)
+{
+ struct rb_node **p, *parent = NULL;
+ struct ubi_wl_prot_entry *pe1;
+
+ pe->e = e;
+ pe->abs_ec = wl->abs_ec + abs_ec;
+
+ p = &wl->prot.pnum.rb_node;
+ while (*p) {
+ parent = *p;
+ pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum);
+
+ if (e->pnum < pe1->e->pnum)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&pe->rb_pnum, parent, p);
+ rb_insert_color(&pe->rb_pnum, &wl->prot.pnum);
+
+ p = &wl->prot.aec.rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec);
+
+ if (pe->abs_ec < pe1->abs_ec)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&pe->rb_aec, parent, p);
+ rb_insert_color(&pe->rb_aec, &wl->prot.aec);
+}
+
+/**
+ * check_protection_over - check if it is time to stop protecting some
+ * physical eraseblocks.
+ *
+ * @wl: the wear-leveling unit description data structure
+ *
+ * This function is called after each erase operation, when the absolute erase
+ * counter is incremented, to check if some physical eraseblock have not to be
+ * protected any longer. These physical eraseblocks are moved from the
+ * protection trees to the used tree.
+ */
+static void check_protection_over(struct ubi_wl_info *wl)
+{
+ struct ubi_wl_prot_entry *pe;
+
+ /*
+ * There may be several protected physical eraseblock to remove,
+ * process them all.
+ */
+ while (1) {
+ spin_lock(&wl->lock);
+ if (tree_empty(&wl->prot.aec)) {
+ spin_unlock(&wl->lock);
+ break;
+ }
+
+ pe = rb_entry(rb_first(&wl->prot.aec),
+ struct ubi_wl_prot_entry, rb_aec);
+
+ if (pe->abs_ec > wl->abs_ec) {
+ spin_unlock(&wl->lock);
+ break;
+ }
+
+ dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu",
+ pe->e->pnum, wl->abs_ec, pe->abs_ec);
+ rb_erase(&pe->rb_aec, &wl->prot.aec);
+ rb_erase(&pe->rb_pnum, &wl->prot.pnum);
+ used_tree_add(wl, pe->e);
+ spin_unlock(&wl->lock);
+
+ ubi_free_wl_prot_entry(pe);
+ cond_resched();
+ }
+}
+
+/**
+ * prot_tree_del - remove a physical eraseblock from the protection trees
+ *
+ * @wl: the wear-leveling unit description data structure
+ * @pnum: the physical eraseblock number to remove
+ */
+static void prot_tree_del(struct ubi_wl_info *wl, int pnum)
+{
+ struct rb_node *p;
+ struct ubi_wl_prot_entry *pe = NULL;
+
+ p = wl->prot.pnum.rb_node;
+ while (p) {
+
+ pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
+
+ if (pnum == pe->e->pnum)
+ break;
+
+ if (pnum < pe->e->pnum)
+ p = p->rb_left;
+ else
+ p = p->rb_right;
+ }
+
+ ubi_assert(pe->e->pnum == pnum);
+ rb_erase(&pe->rb_aec, &wl->prot.aec);
+ rb_erase(&pe->rb_pnum, &wl->prot.pnum);
+ ubi_free_wl_prot_entry(pe);
+}
+
+static int wear_leveling_worker(const struct ubi_info *ubi,
+ struct ubi_bgt_work *wrk, int cancel);
+
+/**
+ * ensure_wear_leveling - schedule wear-leveling if it is needed.
+ *
+ * @ubi: the UBI device description object
+ *
+ * This function checks if it is time to start wear-leveling and schedules it
+ * if yes. This function returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+static int ensure_wear_leveling(const struct ubi_info *ubi)
+{
+ int err = 0;
+ struct ubi_wl_entry *e1;
+ struct ubi_wl_entry *e2;
+ struct ubi_bgt_work *wrk;
+ struct ubi_wl_info *wl = ubi->wl;
+
+ spin_lock(&wl->lock);
+ if (wl->wl_scheduled)
+ /* Wear-leveling is already in the work queue */
+ goto out_unlock;
+
+ /*
+ * If the wl->scrub tree is not empty, scrubbing is needed, and the the
+ * WL worker has to be scheduled anyway.
+ */
+ if (tree_empty(&wl->scrub)) {
+ if (tree_empty(&wl->used) || tree_empty(&wl->free))
+ /* No physical eraseblocks - no deal */
+ goto out_unlock;
+
+ /*
+ * We schedule wear-leveling only if the difference between the
+ * lowest erase counter of used physical eraseblocks and a high
+ * erase counter of free physical eraseblocks is greater then
+ * %UBI_WL_THRESHOLD.
+ */
+ e1 = rb_entry(rb_first(&wl->used), struct ubi_wl_entry, rb);
+ e2 = find_wl_entry(&wl->free, WL_FREE_MAX_DIFF);
+
+ if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
+ goto out_unlock;
+ dbg_wl("schedule wear-leveling");
+ } else
+ dbg_wl("schedule scrubbing");
+
+ wl->wl_scheduled = 1;
+ spin_unlock(&wl->lock);
+
+ wrk = ubi_alloc_bgt_work();
+ if (unlikely(!wrk)) {
+ err = -ENOMEM;
+ goto out_cancel;
+ }
+
+ wrk->func = &wear_leveling_worker;
+ err = ubi_bgt_schedule(ubi, wrk);
+ if (unlikely(err)) {
+ /*
+ * The background was thread is killed, don't clear the
+ * @wl->wl_scheduled flag to prevent this error from happening
+ * again and again. And switch to read-only mode.
+ */
+ ubi_free_bgt_work(wrk);
+ ubi_eba_ro_mode(ubi);
+ }
+ return err;
+
+out_unlock:
+ spin_unlock(&wl->lock);
+ return err;
+
+out_cancel:
+ spin_lock(&wl->lock);
+ wl->wl_scheduled = 0;
+ spin_unlock(&wl->lock);
+ return err;
+}
+
+/**
+ * schedule_erase - schedule an erase work.
+ *
+ * @ubi: the UBI device description object
+ * @e: the WL entry of the physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ *
+ * Note: @wl->erase_pending must be incremented before this function is called.
+ */
+static int schedule_erase(const struct ubi_info *ubi, struct ubi_wl_entry *e,
+ int torture)
+{
+ int err;
+ struct ubi_wl_erase_work *wl_wrk;
+
+ dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
+ e->pnum, e->ec, torture);
+
+ wl_wrk = ubi_alloc_wl_erase_work();
+ if (unlikely(!wl_wrk))
+ return -ENOMEM;
+
+ wl_wrk->wrk.func = &erase_worker;
+ wl_wrk->wrk.priv = wl_wrk;
+ wl_wrk->e = e;
+ wl_wrk->torture = torture;
+
+ err = ubi_bgt_schedule(ubi, &wl_wrk->wrk);
+ if (unlikely(err)) {
+ /*
+ * The background thread was killed, but we really need it. We
+ * can only work in read-only mode without it.
+ */
+ ubi_free_wl_erase_work(wl_wrk);
+ ubi_eba_ro_mode(ubi);
+ }
+ return err;
+}
+
+static int sync_erase(const struct ubi_info *ubi, struct ubi_wl_entry *e,
+ int torture);
+
+/**
+ * erase_worker - physical eraseblock erase worker function.
+ *
+ * @ubi: the UBI device description object
+ * @wrk: the work object
+ * @cancel: non-zero if the worker has to free memory and exit
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. This function also takes care about marking the physical
+ * eraseblock bad if it cannot be erased.
+ */
+static int erase_worker(const struct ubi_info *ubi, struct ubi_bgt_work *wrk,
+ int cancel)
+{
+ int err;
+ struct ubi_wl_info *wl = ubi->wl;
+ struct ubi_wl_erase_work *wl_wrk = wrk->priv;
+ struct ubi_wl_entry *e = wl_wrk->e;
+ int pnum = e->pnum;
+
+ if (unlikely(cancel)) {
+ dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
+ ubi_free_wl_erase_work(wl_wrk);
+ ubi_free_wl_entry(e);
+ return 0;
+ }
+
+ dbg_wl("erase PEB %d EC %d", pnum, e->ec);
+
+ err = sync_erase(ubi, e, wl_wrk->torture);
+ if (likely(!err)) {
+ /* Fine, we've erased it successfully */
+ ubi_free_wl_erase_work(wl_wrk);
+
+ spin_lock(&wl->lock);
+ wl->erase_pending -= 1;
+ ubi_assert(wl->erase_pending >= 0);
+ wl->abs_ec += 1;
+ free_tree_add(wl, e);
+ spin_unlock(&wl->lock);
+
+ /*
+ * One more erase operation has happened, take care about protected
+ * physical eraseblocks.
+ */
+ check_protection_over(wl);
+
+ /* And take care about wear-leveling */
+ err = ensure_wear_leveling(ubi);
+ return err;
+ }
+
+ /*
+ * Some error occurred during erasure. If this is something like
+ * %-EINTR, we just re-schedule this physical eraseblock for
+ * erasure.
+ */
+
+ if (err == -EINTR || err == -EAGAIN || err == -ENOMEM ||
+ err == -EBUSY) {
+ ubi_bgt_reschedule(ubi, wrk); /* Must not return error */
+ return err;
+ }
+
+ ubi_free_wl_erase_work(wl_wrk);
+ ubi_free_wl_entry(e);
+
+ spin_lock(&wl->lock);
+ wl->erase_pending -= 1;
+ spin_unlock(&wl->lock);
+
+ /*
+ * If this is not %-EIO, we have no idea what to do. Scheduling
+ * this physical eraseblock for erasure again will cause repeated
+ * errors.
+ */
+ if (err != -EIO) {
+ ubi_eba_ro_mode(ubi);
+ return err;
+ }
+
+ /* It is %-EIO, the PEB went bad */
+
+ if (!ubi->io->bad_allowed) {
+ ubi_err("flash device may be severly bad");
+ ubi_eba_ro_mode(ubi);
+ err = -EIO;
+ } else {
+ err = ubi_beb_mark_bad(ubi, pnum);
+ if (err)
+ ubi_eba_ro_mode(ubi);
+ }
+ return err;
+}
+
+/**
+ * wear_leveling_worker - wear-leveling worker function.
+ *
+ * @ubi: the UBI device description object
+ * @wrk: the work object
+ * @cancel: non-zero if the worker has to free memory and exit
+ *
+ * This function copies a less worn out physical eraseblock to a more worn out
+ * one. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int wear_leveling_worker(const struct ubi_info *ubi,
+ struct ubi_bgt_work *wrk, int cancel)
+{
+ int err, vol_id, lnum, scrub = 0, data_size, aldata_size;
+ struct ubi_wl_entry *e1, *e2;
+ struct ubi_vid_hdr *vid_hdr;
+ void *buf;
+ uint32_t crc;
+ struct ubi_wl_info *wl = ubi->wl;
+ struct ubi_wl_prot_entry *pe;
+ const struct ubi_io_info *io = ubi->io;
+
+ ubi_free_bgt_work(wrk);
+
+ if (unlikely(cancel))
+ return 0;
+
+ spin_lock(&wl->lock);
+ if (tree_empty(&wl->free) ||
+ (tree_empty(&wl->used) && tree_empty(&wl->scrub))) {
+ /*
+ * No free physical eraseblocks? Well, we cancel wear-leveling
+ * then. It will be triggered again when a free physical
+ * eraseblock appears.
+ *
+ * No used physical eraseblocks? They must be temporarily
+ * protected from being moved. They will be moved to the
+ * @wl->used tree later and the wear-leveling will be
+ * triggered again.
+ */
+ dbg_wl("cancel WL, a list is empty: free %d, used %d",
+ tree_empty(&wl->free), tree_empty(&wl->used));
+ goto out;
+ }
+
+ if (tree_empty(&wl->scrub)) {
+ /*
+ * Now pick the least worn-out used physical eraseblock and a
+ * highly worn-out free physical eraseblock. If the erase
+ * counters differ much enough, start wear-leveling.
+ */
+ e1 = rb_entry(rb_first(&wl->used), struct ubi_wl_entry, rb);
+ e2 = find_wl_entry(&wl->free, WL_FREE_MAX_DIFF);
+
+ if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
+ dbg_wl("no WL needed: min used EC %d, max free EC %d",
+ e1->ec, e2->ec);
+ goto out;
+ }
+ used_tree_del(wl, e1);
+ dbg_wl("move PEB %d EC %d to PEB %d EC %d",
+ e1->pnum, e1->ec, e2->pnum, e2->ec);
+ } else {
+ scrub = 1;
+ e1 = rb_entry(rb_first(&wl->scrub), struct ubi_wl_entry, rb);
+ e2 = find_wl_entry(&wl->free, WL_FREE_MAX_DIFF);
+ scrub_tree_del(wl, e1);
+ dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
+ }
+
+ free_tree_del(wl, e2);
+ wl->move = e1;
+ spin_unlock(&wl->lock);
+
+ vid_hdr = ubi_zalloc_vid_hdr(ubi);
+ if (unlikely(!vid_hdr)) {
+ err = -ENOMEM;
+ goto out_err_cancel;
+ }
+
+ /*
+ * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
+ * We've selected a victim (@e1) and @wl->move refers it. But, user may
+ * call 'ubi_wl_put_peb()' for @e1, and the movement has to be
+ * canceled.
+ *
+ * We so far do not know which logical eraseblock our physical
+ * eraseblock (@e1) belongs to. This means we cannot lock it. We have
+ * to read the volume identifier header first. But if @e1 is put, it is
+ * scheduled for erasure, and we may have a race with the erasure.
+ * So, we may easily get an I/O error when we read the VID header.
+ * This does not necessarily mean something nasty.
+ */
+
+ err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
+ if (unlikely(err) && err != UBI_IO_BITFLIPS) {
+ /* OK, error. If the movement was canceled, don't panic */
+ spin_lock(&wl->lock);
+ if (!wl->move) {
+ spin_unlock(&wl->lock);
+ /* This physical eraseblock was put meanwhile */
+ goto out_cancel_wl;
+ }
+ spin_unlock(&wl->lock);
+ /* Well, this means there is a problem */
+ dbg_wl("VID hdr read error (%d)", err);
+ goto vid_hdr_read_error;
+ }
+
+ if (vid_hdr->vol_type == UBI_VID_STATIC) {
+ data_size = ubi32_to_cpu(vid_hdr->data_size);
+ aldata_size = align_up(data_size, io->min_io_size);
+ } else
+ data_size = aldata_size =
+ io->leb_size - ubi32_to_cpu(vid_hdr->data_pad);
+
+ ubi_assert(aldata_size % io->min_io_size == 0);
+
+ buf = ubi_kmalloc(aldata_size);
+ if (unlikely(!buf)) {
+ err = -ENOMEM;
+ goto out_err_cancel_vid_hdr;
+ }
+
+ vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+ lnum = ubi32_to_cpu(vid_hdr->lnum);
+
+ /*
+ * We do not want anybody to write to this physical eraseblock while
+ * we are copying it, so we lock it.
+ */
+ err = ubi_eba_leb_write_lock(ubi, vol_id, lnum);
+ if (unlikely(err))
+ goto out_err_cancel_buf;
+
+ spin_lock(&wl->lock);
+ if (!wl->move)
+ /* This physical eraseblock was put meanwhile, cancel */
+ goto out_cancel_wl_unlock;
+ spin_unlock(&wl->lock);
+
+ /*
+ * From now on nobody can access this physical eraseblock as we locked
+ * the corresponding logical eraseblock.
+ */
+ dbg_wl("read %d bytes of data", aldata_size);
+ err = ubi_io_read_data(ubi, buf, e1->pnum, 0, aldata_size);
+ if (unlikely(err) && err != UBI_IO_BITFLIPS) {
+ ubi_warn("error %d while reading data from PEB %d",
+ err, e1->pnum);
+ goto data_read_error;
+ }
+
+ /*
+ * Now we have got to calculate how much data we have to to copy. In
+ * case of a static volume it is fairly easy - the VID header contains
+ * the data size. In case of a dynamic volume it is more difficult - we
+ * have to read the contents, cut 0xFF bytes from the end and copy only
+ * the first part. We must do this to avoid writing 0xFF bytes as it
+ * may have some side-effects. And not only this. It is important not
+ * to include those 0xFFs to CRC because later the user may fill them
+ * by his data!
+ */
+ if (vid_hdr->vol_type == UBI_VID_DYNAMIC)
+ aldata_size = data_size =
+ ubi_calc_data_len(ubi, buf, data_size);
+
+ cond_resched();
+ crc = crc32(UBI_CRC32_INIT, buf, data_size);
+
+ /*
+ * It may turn out that the whole @e1->pnum physical eraseblock
+ * contains only 0xFF bytes. Then we have to only write the VID header
+ * and do not write any data. This also means we should not set
+ * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc.
+ */
+ if (likely(data_size > 0)) {
+ vid_hdr->copy_flag = 1;
+ vid_hdr->data_size = cpu_to_ubi32(data_size);
+ vid_hdr->data_crc = cpu_to_ubi32(crc);
+ }
+ vid_hdr->leb_ver = cpu_to_ubi32(ubi32_to_cpu(vid_hdr->leb_ver) + 1);
+
+ cond_resched();
+ err = ubi_io_write_vid_hdr(ubi, e2->pnum, vid_hdr);
+ if (unlikely(err))
+ goto write_error;
+
+ /* Read the VID header back and check if it was written correctly */
+ cond_resched();
+ err = ubi_io_read_vid_hdr(ubi, e2->pnum, vid_hdr, 1);
+ if (unlikely(err)) {
+ if (err != UBI_IO_BITFLIPS) {
+ ubi_warn("cannot read VID header back from PEB %d", e2->pnum);
+ goto write_error;
+ }
+ goto bitflip;
+ }
+
+ if (likely(data_size > 0)) {
+ void *buf1;
+
+ err = ubi_io_write_data(ubi, buf, e2->pnum, 0, aldata_size);
+ if (unlikely(err))
+ goto write_error;
+
+ /*
+ * We've written the data and are going to read it back to make
+ * sure it was written correctly.
+ */
+ buf1 = ubi_kmalloc(aldata_size);
+ if (unlikely(!buf1)) {
+ err = -ENOMEM;
+ goto write_error;
+ }
+
+ cond_resched();
+ err = ubi_io_read_data(ubi, buf1, e2->pnum, 0, aldata_size);
+ if (unlikely(err)) {
+ ubi_kfree(buf1);
+ if (err != UBI_IO_BITFLIPS) {
+ ubi_warn("cannot read data back from PEB %d",
+ e2->pnum);
+ goto write_error;
+ }
+ goto bitflip;
+ }
+
+ cond_resched();
+ if (unlikely(memcmp(buf, buf1, aldata_size))) {
+ ubi_warn("read data back from PEB %d - it is different",
+ e2->pnum);
+ err = -EINVAL;
+ ubi_kfree(buf1);
+ goto write_error;
+ }
+ ubi_kfree(buf1);
+ }
+
+ /*
+ * Re-map the logical eraseblock to the new physical eraseblock
+ * (@e2->pnum).
+ */
+ ubi_eba_leb_remap(ubi, vol_id, lnum, e2->pnum);
+
+ /*
+ * The physical eraseblock was successfully copied and re-mapped. Add
+ * the new copy to the @wl->used tree and schedule the old one for
+ * erasure.
+ */
+ spin_lock(&wl->lock);
+ wl->erase_pending += 1;
+ used_tree_add(wl, e2);
+ wl->wl_scheduled = 0;
+ ubi_assert(wl->move);
+ wl->move = NULL;
+ spin_unlock(&wl->lock);
+
+ /* Unlock the logical eraseblock */
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+
+ ubi_kfree(buf);
+ ubi_free_vid_hdr(ubi, vid_hdr);
+
+ /*
+ * Note, we do not check if more wear-leveling is needed here. We
+ * schedule the physical eraseblock for erasure so we know that the
+ * erase worker will take care about that.
+ */
+ err = schedule_erase(ubi, e1, 0);
+ if (unlikely(err)) {
+ /* This may only happen if there is no memory */
+ ubi_free_wl_entry(e1);
+ ubi_eba_ro_mode(ubi);
+ }
+ dbg_wl("done");
+ return err;
+
+out:
+ wl->wl_scheduled = 0;
+ spin_unlock(&wl->lock);
+ return 0;
+
+ /*
+ * The physical eraseblock we have selected was put. It was scheduled
+ * for erasure and removed from the @wl->used tree, so we only need to
+ * return @e2 back to the @wl->free tree.
+ */
+out_cancel_wl_unlock:
+ spin_unlock(&wl->lock);
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+ ubi_kfree(buf);
+out_cancel_wl:
+ dbg_wl("PEB %d was put, don't move it, cancel", e1->pnum);
+ ubi_free_vid_hdr(ubi, vid_hdr);
+ spin_lock(&wl->lock);
+ ubi_assert(wl->move == NULL);
+ free_tree_add(wl, e2);
+ wl->wl_scheduled = 0;
+ spin_unlock(&wl->lock);
+ return 0;
+
+ /*
+ * Some non-I/O error occurred. Neither @e1 nor @e2 were changed, just
+ * get them back to the lists they were taken from.
+ */
+out_err_cancel_buf:
+ ubi_kfree(buf);
+out_err_cancel_vid_hdr:
+ ubi_free_vid_hdr(ubi, vid_hdr);
+out_err_cancel:
+ spin_lock(&wl->lock);
+ wl->wl_scheduled = 0;
+ if (wl->move) {
+ if (scrub)
+ scrub_tree_add(wl, e1);
+ else
+ used_tree_add(wl, e1);
+ wl->move = NULL;
+ }
+ free_tree_add(wl, e2);
+ spin_unlock(&wl->lock);
+ return err;
+
+ /*
+ * Failed to read from the @e1->pnum physical eraseblock. Something
+ * nasty happened. We don't want to move this physical eraseblock.
+ * We also don't want this physical eraseblock to be repeatedly
+ * selected for wear-leveling, so protect it.
+ *
+ * FIXME: It would be better to notify upper layers about this and let
+ * them handle this. But this is not implemented.
+ */
+data_read_error:
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+ ubi_kfree(buf);
+vid_hdr_read_error:
+ ubi_free_vid_hdr(ubi, vid_hdr);
+ spin_lock(&wl->lock);
+ free_tree_add(wl, e2);
+ spin_unlock(&wl->lock);
+
+ pe = ubi_alloc_wl_prot_entry();
+ if (!pe) {
+ spin_lock(&wl->lock);
+ wl->wl_scheduled = 0;
+ if (wl->move)
+ used_tree_add(wl, e1);
+ wl->move = NULL;
+ spin_unlock(&wl->lock);
+ return -ENOMEM;
+ }
+ spin_lock(&wl->lock);
+ wl->wl_scheduled = 0;
+ if (wl->move) {
+ prot_tree_add(wl, e1, pe, ST_PROTECTION);
+ wl->move = NULL;
+ spin_unlock(&wl->lock);
+ } else {
+ spin_unlock(&wl->lock);
+ ubi_free_wl_prot_entry(pe);
+ }
+ return 0;
+
+ /*
+ * An error occurred during writing. Something was written to @e2-pnum,
+ * so we cannot treat it as free any longer. Put @e1 back to the
+ * @wl->used tree and schedule @e2->pnum for erasure.
+ *
+ * Normally, this happens if @e2->pnum went bad - then it will be
+ * handled in the erase function. But if the flash does not admit of
+ * bad physical eraseblock, we switch to read-only mode.
+ */
+write_error:
+ ubi_kfree(buf);
+ ubi_free_vid_hdr(ubi, vid_hdr);
+
+ spin_lock(&wl->lock);
+ wl->wl_scheduled = 0;
+ ubi_assert(wl->move);
+ used_tree_add(wl, e1);
+ wl->move = NULL;
+ spin_unlock(&wl->lock);
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+
+ if (ubi->io->bad_allowed) {
+ int err1;
+
+ spin_lock(&wl->lock);
+ wl->erase_pending += 1;
+ spin_unlock(&wl->lock);
+ err1 = schedule_erase(ubi, e2, 1);
+ if (err1) {
+ /* No memory - bad, switch to read-only mode */
+ ubi_free_wl_entry(e2);
+ spin_lock(&wl->lock);
+ wl->erase_pending -= 1;
+ spin_unlock(&wl->lock);
+ ubi_eba_ro_mode(ubi);
+ err = err1;
+ }
+ } else {
+ ubi_err("flash device may be severly bad");
+ ubi_free_wl_entry(e2);
+ ubi_eba_ro_mode(ubi);
+ }
+ return err;
+
+ /*
+ * We successfully wrote the data to @e2->pnum, but when we red it back
+ * we detected a bit-flip. So we cancel the operation.
+ */
+bitflip:
+ dbg_wl("bit-flip at the copied data, cancel");
+ ubi_kfree(buf);
+ ubi_free_vid_hdr(ubi, vid_hdr);
+
+ spin_lock(&wl->lock);
+ wl->wl_scheduled = 0;
+ ubi_assert(wl->move);
+ if (scrub)
+ scrub_tree_add(wl, e1);
+ else
+ used_tree_add(wl, e1);
+ wl->move = NULL;
+ spin_unlock(&wl->lock);
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+
+ spin_lock(&wl->lock);
+ wl->erase_pending += 1;
+ spin_unlock(&wl->lock);
+ err = schedule_erase(ubi, e2, 0);
+ if (err) {
+ /* No memory - bad, switch to read-only mode */
+ ubi_free_wl_entry(e2);
+ spin_lock(&wl->lock);
+ wl->erase_pending -= 1;
+ spin_unlock(&wl->lock);
+ ubi_eba_ro_mode(ubi);
+ }
+
+ return err;
+
+}
+
+/**
+ * sync_erase - synchronously erase a physical eraseblock.
+ *
+ * @ubi: the UBI device description object
+ * @e: the the physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int sync_erase(const struct ubi_info *ubi, struct ubi_wl_entry *e,
+ int torture)
+{
+ int err;
+ struct ubi_ec_hdr *ec_hdr;
+ struct ubi_wl_info *wl = ubi->wl;
+ uint64_t ec = e->ec;
+
+ dbg_wl("erase PEB %d, old EC %llu", e->pnum, (unsigned long long)ec);
+
+ err = paranoid_check_ec(ubi, e->pnum, e->ec);
+ if (unlikely(err > 0))
+ return -EINVAL;
+
+ ec_hdr = ubi_zalloc_ec_hdr(ubi);
+ if (unlikely(!ec_hdr))
+ return -ENOMEM;
+
+ err = ubi_io_sync_erase(ubi, e->pnum, torture);
+ if (unlikely(err < 0))
+ goto out_free;
+
+ ec += err;
+ if (unlikely(ec > UBI_MAX_ERASECOUNTER)) {
+ /*
+ * Erase counter overflow. Upgrade UBI and use 64-bit
+ * erase counters internally.
+ */
+ ubi_err("erase counter overflow at PEB %d, EC %llu",
+ e->pnum, (unsigned long long)ec);
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ dbg_wl("erased PEB %d, new EC %llu", e->pnum, (unsigned long long)ec);
+
+ ec_hdr->ec = cpu_to_ubi64(ec);
+
+ err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
+ if (unlikely(err))
+ goto out_free;
+
+ e->ec = ec;
+ if (e->ec > wl->max_ec)
+ wl->max_ec = e->ec;
+
+out_free:
+ ubi_free_ec_hdr(ubi, ec_hdr);
+ return err;
+}
+
+/**
+ * produce_free - produce a free physical eraseblock.
+ *
+ * @ubi: the UBI device description object
+ *
+ * This function tries to make a free PEB by means of syncronoes execution of
+ * pending works. This may be neede if, for example the background thread is
+ * disabled. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int produce_free(const struct ubi_info *ubi)
+{
+ int err;
+ struct ubi_wl_info *wl = ubi->wl;
+
+ spin_lock(&wl->lock);
+ while (tree_empty(&wl->free)) {
+ spin_unlock(&wl->lock);
+
+ dbg_wl("do one work synchronously");
+ err = ubi_bgt_do_work(ubi);
+ if (unlikely(err))
+ return err;
+
+ spin_lock(&wl->lock);
+ }
+ spin_unlock(&wl->lock);
+
+ return 0;
+}
+
+/**
+ * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
+ *
+ * @e: the wear-leveling entry to add
+ * @root: the root of the tree
+ *
+ * Note, we use (erase counter, physical eraseblock number) pairs as keys in
+ * the @wl->used and @wl->free RB-trees.
+ */
+static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
+{
+ struct rb_node **p, *parent = NULL;
+
+ p = &root->rb_node;
+ while (*p) {
+ struct ubi_wl_entry *e1;
+
+ parent = *p;
+ e1 = rb_entry(parent, struct ubi_wl_entry, rb);
+
+ if (e->ec < e1->ec)
+ p = &(*p)->rb_left;
+ else if (e->ec > e1->ec)
+ p = &(*p)->rb_right;
+ else {
+ ubi_assert(e->pnum != e1->pnum);
+ if (e->pnum < e1->pnum)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ }
+
+ rb_link_node(&e->rb, parent, p);
+ rb_insert_color(&e->rb, root);
+}
+
+/**
+ * in_wl_tree - check if a wear-leveling entry is present in a WL RB-tree.
+ *
+ * @e: the wear-leveling entry to check
+ * @root: the root of the tree
+ *
+ * This function returns non-zero if @e is in the @root RB-tree and zero if it
+ * is not.
+ */
+static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
+{
+ struct rb_node *p;
+
+ p = root->rb_node;
+ while (p) {
+ struct ubi_wl_entry *e1;
+
+ e1 = rb_entry(p, struct ubi_wl_entry, rb);
+
+ if (e->pnum == e1->pnum) {
+ ubi_assert(e == e1);
+ return 1;
+ }
+
+ if (e->ec < e1->ec)
+ p = p->rb_left;
+ else if (e->ec > e1->ec)
+ p = p->rb_right;
+ else {
+ ubi_assert(e->pnum != e1->pnum);
+ if (e->pnum < e1->pnum)
+ p = p->rb_left;
+ else
+ p = p->rb_right;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * tree_destroy - destroy an RB-tree.
+ *
+ * @root: the root of the tree to destroy
+ */
+static void tree_destroy(struct rb_root *root)
+{
+ struct rb_node *rb;
+ struct ubi_wl_entry *e;
+
+ rb = root->rb_node;
+ while (rb) {
+ if (rb->rb_left)
+ rb = rb->rb_left;
+ else if (rb->rb_right)
+ rb = rb->rb_right;
+ else {
+ e = rb_entry(rb, struct ubi_wl_entry, rb);
+
+ rb = rb_parent(rb);
+ if (rb) {
+ if (rb->rb_left == &e->rb)
+ rb->rb_left = NULL;
+ else
+ rb->rb_right = NULL;
+ }
+
+ ubi_free_wl_entry(e);
+ }
+ }
+}
+
+/**
+ * protection_trees_destroy - destroy the protection RB-trees.
+ *
+ * @wl: the wear-leveling unit description data structure
+ */
+static void protection_trees_destroy(struct ubi_wl_info *wl)
+{
+ struct rb_node *rb;
+ struct ubi_wl_prot_entry *pe;
+
+ rb = wl->prot.aec.rb_node;
+ while (rb) {
+ if (rb->rb_left)
+ rb = rb->rb_left;
+ else if (rb->rb_right)
+ rb = rb->rb_right;
+ else {
+ pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec);
+
+ rb = rb_parent(rb);
+ if (rb) {
+ if (rb->rb_left == &pe->rb_aec)
+ rb->rb_left = NULL;
+ else
+ rb->rb_right = NULL;
+ }
+
+ ubi_free_wl_entry(pe->e);
+ ubi_free_wl_prot_entry(pe);
+ }
+ }
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID_WL
+
+/**
+ * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
+ * is correct.
+ *
+ * @ubi: the UBI device description object
+ * @pnum: the physical eraseblock number to check
+ * @ec: the erase counter to check
+ *
+ * This function returns zero if the erase counter of physical eraseblock @pnum
+ * is equivalent to @ec, %1 if not, and a negative error code if an error
+ * occurred.
+ */
+static int paranoid_check_ec(const struct ubi_info *ubi, int pnum, int ec)
+{
+ int err;
+ long long read_ec;
+ struct ubi_ec_hdr *ec_hdr;
+
+ ec_hdr = ubi_zalloc_ec_hdr(ubi);
+ if (unlikely(!ec_hdr))
+ return -ENOMEM;
+
+ err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
+ if (unlikely(err) && err != UBI_IO_BITFLIPS) {
+ /* The header does not have to exist */
+ err = 0;
+ goto out_free;
+ }
+
+ read_ec = ubi64_to_cpu(ec_hdr->ec);
+ if (unlikely(ec != read_ec)) {
+ ubi_err("paranoid check failed for PEB %d", pnum);
+ ubi_err("read EC is %lld, should be %d", read_ec, ec);
+ ubi_dbg_dump_stack();
+ err = 1;
+ } else
+ err = 0;
+
+out_free:
+ ubi_free_ec_hdr(ubi, ec_hdr);
+ return err;
+}
+
+/**
+ * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
+ * in a WL RB-tree.
+ *
+ * @e: the wear-leveling entry to check
+ * @root: the root of the tree
+ *
+ * This function returns zero if @e is in the @root RB-tree and %1 if it
+ * is not.
+ */
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
+{
+ if (likely(in_wl_tree(e, root)))
+ return 0;
+
+ ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ",
+ e->pnum, e->ec, root);
+ ubi_dbg_dump_stack();
+ return 1;
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID_WL */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists