[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140714191947.GP7537@ld-irv-0074>
Date: Mon, 14 Jul 2014 12:19:47 -0700
From: Brian Norris <computersforpeace@...il.com>
To: Michael Grzeschik <m.grzeschik@...gutronix.de>
Cc: linux-mtd@...ts.infradead.org, dwmw2@...radead.org,
linux-kernel@...r.kernel.org, kernel@...gutronix.de
Subject: Re: [PATH v2] mxc_nand: use our own read_page function
Hi Michael,
On Fri, Jun 27, 2014 at 12:38:44PM +0200, Michael Grzeschik wrote:
> The current approach of the read_page function is to iterate over all
> subpages and call the correct_data function. The correct_data function
> currently does the same. It iterates over all subpages and checks for
> correctable and uncorrectable data. This redundant call for each
> subpage leads to miscalculations.
Hmm, you certainly do have some statistic bugs, but I'm not sure you're
solving this correctly.
> This patch changes the driver to use its own read_page function in which
> we call the correct_data function only once per page. With that we do
> the failure and correct statistics counting inside this function.
>
> Signed-off-by: Michael Grzeschik <m.grzeschik@...gutronix.de>
> ---
> fixed printk to pr_debug
>
> drivers/mtd/nand/mxc_nand.c | 73 ++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 69 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
> index a72d508..5f9e36d 100644
> --- a/drivers/mtd/nand/mxc_nand.c
> +++ b/drivers/mtd/nand/mxc_nand.c
> @@ -141,6 +141,8 @@ struct mxc_nand_host;
>
> struct mxc_nand_devtype_data {
> void (*preset)(struct mtd_info *);
> + int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip,
> + uint8_t *buf, int oob_required, int page);
> void (*send_cmd)(struct mxc_nand_host *, uint16_t, int);
> void (*send_addr)(struct mxc_nand_host *, uint16_t, int);
> void (*send_page)(struct mtd_info *, unsigned int);
> @@ -649,6 +651,59 @@ static int mxc_nand_correct_data_v1(struct mtd_info *mtd, u_char *dat,
> return 0;
> }
>
> +/**
> + * mxc_nand_read_page_hwecc_v2_v3 - [REPLACEABLE] hardware ECC based page read function
Don't include the [REPLACEABLE] language here. That's mostly just used
for code like nand_base, where we provide some defaults / helpers that
may or may not be intended to allow overrides. (Not really the best
approach, IMO, but that's beside the point.) So don't mark this in your
low-level driver.
> + * @mtd: mtd info structure
> + * @chip: nand chip info structure
> + * @buf: buffer to store read data
> + * @oob_required: caller requires OOB data read to chip->oob_poi
> + * @page: page number to read
> + *
> + * Not for syndrome calculating ECC controllers which need a special oob layout.
> + */
> +static int
I don't think you want a line break here, to match the style of the rest
of the driver.
> +mxc_nand_read_page_hwecc_v2_v3(struct mtd_info *mtd,
> + struct nand_chip *chip,
> + uint8_t *buf, int oob_required, int page)
> +{
> + int i, eccsize = chip->ecc.size;
> + struct nand_chip *nand_chip = mtd->priv;
> + struct mxc_nand_host *host = nand_chip->priv;
> + int eccbytes = chip->ecc.bytes;
> + int eccsteps = chip->ecc.steps;
> + uint8_t *p = buf;
> + uint8_t *ecc_calc = chip->buffers->ecccalc;
> + uint8_t *ecc_code = chip->buffers->ecccode;
> + uint32_t *eccpos = chip->ecc.layout->eccpos;
> + unsigned int max_bitflips = 0;
> + u32 ecc_stat, err;
> + int stat;
> +
> + ecc_stat = host->devtype_data->get_ecc_status(host);
> + for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
> + err = ecc_stat & 0xf;
drivers/mtd/nand/mxc_nand.c: In function 'mxc_nand_read_page_hwecc_v2_v3':
drivers/mtd/nand/mxc_nand.c:679:16: warning: variable 'err' set but not used [-Wunused-but-set-variable]
Is that intentional?
> + chip->ecc.hwctl(mtd, NAND_ECC_READ);
> + chip->read_buf(mtd, p, eccsize);
> + chip->ecc.calculate(mtd, p, &ecc_calc[i]);
> + ecc_stat >>= 4;
You're also never using the value of ecc_stat, except to calculate other
values which are never used.
> +
Drop the extra blank line.
> + }
> + ecc_stat = host->devtype_data->get_ecc_status(host);
Result unused?
> + chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
> +
> + for (i = 0; i < chip->ecc.total; i++)
> + ecc_code[i] = chip->oob_poi[eccpos[i]];
> +
> + eccsteps = chip->ecc.steps;
> + p = buf;
> +
> + stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
> + max_bitflips = max_t(unsigned int, max_bitflips, stat);
This is wrong. First, you probably don't want to cast 'stat' to
unsigned, in case it's an error. Second, absent an error, this just
resolves to:
max_bitflips = stat;
So you're not actually determining the maximum per-sector bitflips,
you're just determining the total # of bitflips.
> +
> + return max_bitflips;
So I think you have some leftover/unused code in this function
Additionally, I'm confused because your ecc.correct() function is now
hiding some of the stat counting -- this is contrary to its usage
elsewhere. See more comments below.
> +}
> +
> +
> static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
> u_char *read_ecc, u_char *calc_ecc)
> {
> @@ -656,7 +711,7 @@ static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
> struct mxc_nand_host *host = nand_chip->priv;
> u32 ecc_stat, err;
> int no_subpages = 1;
> - int ret = 0;
> + int ret = 0, broken = 0;
> u8 ecc_bit_mask, err_limit = 0x1;
>
> ecc_bit_mask = 0xf;
> @@ -673,15 +728,21 @@ static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
> do {
> err = ecc_stat & ecc_bit_mask;
> if (err > err_limit) {
> - printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
> - return -1;
> + broken++;
> } else {
> ret += err;
> }
> ecc_stat >>= 4;
> } while (--no_subpages);
>
> - pr_debug("%d Symbol Correctable RS-ECC Error\n", ret);
> + mtd->ecc_stats.corrected += ret;
> + if (ret)
> + pr_debug("%d Symbol Correctable RS-ECC Error\n", ret);
> +
> + mtd->ecc_stats.failed += broken;
> + if (broken)
> + printk(KERN_WARNING "%d Symbol UnCorrectable RS-ECC Error\n",
> + broken);
This is wrong; either this function should not be assigned to
ecc.correct(), or else it should not be modifying the ecc_stats. See all
other examples of ecc.correct() callbacks. Additionally, it should be
returning negative to indicate ECC failure, which you're not doing.
One solution: stop don't assign an chip->ecc.correct() callback any
more, so that your correction logic is encapsulated entirely within
chip->read_page(). You would need to make mxc_nand_read_page_hwecc_v2_v3
call mxc_nand_correct_data_v2_v3() directly, of course. (And you could
even try a BUG() whenever chip->ecc.correct() is called, like
cafe_nand.c does.)
And in fact, I'd just take this one step further; kill
mxc_nand_correct_data_v2_v3() and just merge it with
mxc_nand_read_page_hwecc_v2_v3(). Then you can count bitflips (resolving
the 'max_bitflips' issue I pointed out above) all in one place.
>
> return ret;
> }
> @@ -1216,6 +1277,7 @@ static const struct mxc_nand_devtype_data imx27_nand_devtype_data = {
> /* v21: i.MX25, i.MX35 */
> static const struct mxc_nand_devtype_data imx25_nand_devtype_data = {
> .preset = preset_v2,
> + .read_page = mxc_nand_read_page_hwecc_v2_v3,
> .send_cmd = send_cmd_v1_v2,
> .send_addr = send_addr_v1_v2,
> .send_page = send_page_v2,
> @@ -1242,6 +1304,7 @@ static const struct mxc_nand_devtype_data imx25_nand_devtype_data = {
> /* v3.2a: i.MX51 */
> static const struct mxc_nand_devtype_data imx51_nand_devtype_data = {
> .preset = preset_v3,
> + .read_page = mxc_nand_read_page_hwecc_v2_v3,
> .send_cmd = send_cmd_v3,
> .send_addr = send_addr_v3,
> .send_page = send_page_v3,
> @@ -1269,6 +1332,7 @@ static const struct mxc_nand_devtype_data imx51_nand_devtype_data = {
> /* v3.2b: i.MX53 */
> static const struct mxc_nand_devtype_data imx53_nand_devtype_data = {
> .preset = preset_v3,
> + .read_page = mxc_nand_read_page_hwecc_v2_v3,
> .send_cmd = send_cmd_v3,
> .send_addr = send_addr_v3,
> .send_page = send_page_v3,
> @@ -1483,6 +1547,7 @@ static int mxcnd_probe(struct platform_device *pdev)
> this->ecc.layout = host->devtype_data->ecclayout_512;
>
> if (host->pdata.hw_ecc) {
> + this->ecc.read_page = host->devtype_data->read_page;
> this->ecc.calculate = mxc_nand_calculate_ecc;
> this->ecc.hwctl = mxc_nand_enable_hwecc;
> this->ecc.correct = host->devtype_data->correct_data;
Brian
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists