lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140714191947.GP7537@ld-irv-0074>
Date:	Mon, 14 Jul 2014 12:19:47 -0700
From:	Brian Norris <computersforpeace@...il.com>
To:	Michael Grzeschik <m.grzeschik@...gutronix.de>
Cc:	linux-mtd@...ts.infradead.org, dwmw2@...radead.org,
	linux-kernel@...r.kernel.org, kernel@...gutronix.de
Subject: Re: [PATH v2] mxc_nand: use our own read_page function

Hi Michael,

On Fri, Jun 27, 2014 at 12:38:44PM +0200, Michael Grzeschik wrote:
> The current approach of the read_page function is to iterate over all
> subpages and call the correct_data function. The correct_data function
> currently does the same. It iterates over all subpages and checks for
> correctable and uncorrectable data. This redundant call for each
> subpage leads to miscalculations.

Hmm, you certainly do have some statistic bugs, but I'm not sure you're
solving this correctly.

> This patch changes the driver to use its own read_page function in which
> we call the correct_data function only once per page. With that we do
> the failure and correct statistics counting inside this function.
> 
> Signed-off-by: Michael Grzeschik <m.grzeschik@...gutronix.de>
> ---
> fixed printk to pr_debug
> 
>  drivers/mtd/nand/mxc_nand.c | 73 ++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 69 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
> index a72d508..5f9e36d 100644
> --- a/drivers/mtd/nand/mxc_nand.c
> +++ b/drivers/mtd/nand/mxc_nand.c
> @@ -141,6 +141,8 @@ struct mxc_nand_host;
>  
>  struct mxc_nand_devtype_data {
>  	void (*preset)(struct mtd_info *);
> +	int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip,
> +			uint8_t *buf, int oob_required, int page);
>  	void (*send_cmd)(struct mxc_nand_host *, uint16_t, int);
>  	void (*send_addr)(struct mxc_nand_host *, uint16_t, int);
>  	void (*send_page)(struct mtd_info *, unsigned int);
> @@ -649,6 +651,59 @@ static int mxc_nand_correct_data_v1(struct mtd_info *mtd, u_char *dat,
>  	return 0;
>  }
>  
> +/**
> + * mxc_nand_read_page_hwecc_v2_v3 - [REPLACEABLE] hardware ECC based page read function

Don't include the [REPLACEABLE] language here. That's mostly just used
for code like nand_base, where we provide some defaults / helpers that
may or may not be intended to allow overrides. (Not really the best
approach, IMO, but that's beside the point.) So don't mark this in your
low-level driver.

> + * @mtd: mtd info structure
> + * @chip: nand chip info structure
> + * @buf: buffer to store read data
> + * @oob_required: caller requires OOB data read to chip->oob_poi
> + * @page: page number to read
> + *
> + * Not for syndrome calculating ECC controllers which need a special oob layout.
> + */
> +static int

I don't think you want a line break here, to match the style of the rest
of the driver.

> +mxc_nand_read_page_hwecc_v2_v3(struct mtd_info *mtd,
> +			       struct nand_chip *chip,
> +			       uint8_t *buf, int oob_required, int page)
> +{
> +	int i, eccsize = chip->ecc.size;
> +	struct nand_chip *nand_chip = mtd->priv;
> +	struct mxc_nand_host *host = nand_chip->priv;
> +	int eccbytes = chip->ecc.bytes;
> +	int eccsteps = chip->ecc.steps;
> +	uint8_t *p = buf;
> +	uint8_t *ecc_calc = chip->buffers->ecccalc;
> +	uint8_t *ecc_code = chip->buffers->ecccode;
> +	uint32_t *eccpos = chip->ecc.layout->eccpos;
> +	unsigned int max_bitflips = 0;
> +	u32 ecc_stat, err;
> +	int stat;
> +
> +	ecc_stat = host->devtype_data->get_ecc_status(host);
> +	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
> +		err = ecc_stat & 0xf;

drivers/mtd/nand/mxc_nand.c: In function 'mxc_nand_read_page_hwecc_v2_v3':
drivers/mtd/nand/mxc_nand.c:679:16: warning: variable 'err' set but not used [-Wunused-but-set-variable]

Is that intentional?

> +		chip->ecc.hwctl(mtd, NAND_ECC_READ);
> +		chip->read_buf(mtd, p, eccsize);
> +		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
> +		ecc_stat >>= 4;

You're also never using the value of ecc_stat, except to calculate other
values which are never used.

> +

Drop the extra blank line.

> +	}
> +	ecc_stat = host->devtype_data->get_ecc_status(host);

Result unused?

> +	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
> +
> +	for (i = 0; i < chip->ecc.total; i++)
> +		ecc_code[i] = chip->oob_poi[eccpos[i]];
> +
> +	eccsteps = chip->ecc.steps;
> +	p = buf;
> +
> +	stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
> +	max_bitflips = max_t(unsigned int, max_bitflips, stat);

This is wrong. First, you probably don't want to cast 'stat' to
unsigned, in case it's an error. Second, absent an error, this just
resolves to:

	max_bitflips = stat;

So you're not actually determining the maximum per-sector bitflips,
you're just determining the total # of bitflips.

> +
> +	return max_bitflips;

So I think you have some leftover/unused code in this function

Additionally, I'm confused because your ecc.correct() function is now
hiding some of the stat counting -- this is contrary to its usage
elsewhere. See more comments below.

> +}
> +
> +
>  static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
>  				 u_char *read_ecc, u_char *calc_ecc)
>  {
> @@ -656,7 +711,7 @@ static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
>  	struct mxc_nand_host *host = nand_chip->priv;
>  	u32 ecc_stat, err;
>  	int no_subpages = 1;
> -	int ret = 0;
> +	int ret = 0, broken = 0;
>  	u8 ecc_bit_mask, err_limit = 0x1;
>  
>  	ecc_bit_mask = 0xf;
> @@ -673,15 +728,21 @@ static int mxc_nand_correct_data_v2_v3(struct mtd_info *mtd, u_char *dat,
>  	do {
>  		err = ecc_stat & ecc_bit_mask;
>  		if (err > err_limit) {
> -			printk(KERN_WARNING "UnCorrectable RS-ECC Error\n");
> -			return -1;
> +			broken++;
>  		} else {
>  			ret += err;
>  		}
>  		ecc_stat >>= 4;
>  	} while (--no_subpages);
>  
> -	pr_debug("%d Symbol Correctable RS-ECC Error\n", ret);
> +	mtd->ecc_stats.corrected += ret;
> +	if (ret)
> +		pr_debug("%d Symbol Correctable RS-ECC Error\n", ret);
> +
> +	mtd->ecc_stats.failed += broken;
> +	if (broken)
> +		printk(KERN_WARNING "%d Symbol UnCorrectable RS-ECC Error\n",
> +			 broken);

This is wrong; either this function should not be assigned to
ecc.correct(), or else it should not be modifying the ecc_stats. See all
other examples of ecc.correct() callbacks. Additionally, it should be
returning negative to indicate ECC failure, which you're not doing.

One solution: stop don't assign an chip->ecc.correct() callback any
more, so that your correction logic is encapsulated entirely within
chip->read_page(). You would need to make mxc_nand_read_page_hwecc_v2_v3
call mxc_nand_correct_data_v2_v3() directly, of course. (And you could
even try a BUG() whenever chip->ecc.correct() is called, like
cafe_nand.c does.)

And in fact, I'd just take this one step further; kill
mxc_nand_correct_data_v2_v3() and just merge it with
mxc_nand_read_page_hwecc_v2_v3(). Then you can count bitflips (resolving
the 'max_bitflips' issue I pointed out above) all in one place.

>  
>  	return ret;
>  }
> @@ -1216,6 +1277,7 @@ static const struct mxc_nand_devtype_data imx27_nand_devtype_data = {
>  /* v21: i.MX25, i.MX35 */
>  static const struct mxc_nand_devtype_data imx25_nand_devtype_data = {
>  	.preset = preset_v2,
> +	.read_page = mxc_nand_read_page_hwecc_v2_v3,
>  	.send_cmd = send_cmd_v1_v2,
>  	.send_addr = send_addr_v1_v2,
>  	.send_page = send_page_v2,
> @@ -1242,6 +1304,7 @@ static const struct mxc_nand_devtype_data imx25_nand_devtype_data = {
>  /* v3.2a: i.MX51 */
>  static const struct mxc_nand_devtype_data imx51_nand_devtype_data = {
>  	.preset = preset_v3,
> +	.read_page = mxc_nand_read_page_hwecc_v2_v3,
>  	.send_cmd = send_cmd_v3,
>  	.send_addr = send_addr_v3,
>  	.send_page = send_page_v3,
> @@ -1269,6 +1332,7 @@ static const struct mxc_nand_devtype_data imx51_nand_devtype_data = {
>  /* v3.2b: i.MX53 */
>  static const struct mxc_nand_devtype_data imx53_nand_devtype_data = {
>  	.preset = preset_v3,
> +	.read_page = mxc_nand_read_page_hwecc_v2_v3,
>  	.send_cmd = send_cmd_v3,
>  	.send_addr = send_addr_v3,
>  	.send_page = send_page_v3,
> @@ -1483,6 +1547,7 @@ static int mxcnd_probe(struct platform_device *pdev)
>  	this->ecc.layout = host->devtype_data->ecclayout_512;
>  
>  	if (host->pdata.hw_ecc) {
> +		this->ecc.read_page = host->devtype_data->read_page;
>  		this->ecc.calculate = mxc_nand_calculate_ecc;
>  		this->ecc.hwctl = mxc_nand_enable_hwecc;
>  		this->ecc.correct = host->devtype_data->correct_data;

Brian
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ