lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0ec477b5-e404-536f-ff60-39f43208c3cc@kaod.org>
Date:   Wed, 30 Mar 2022 14:14:40 +0200
From:   Cédric Le Goater <clg@...d.org>
To:     Chin-Ting Kuo <chin-ting_kuo@...eedtech.com>,
        "linux-spi@...r.kernel.org" <linux-spi@...r.kernel.org>,
        "linux-mtd@...ts.infradead.org" <linux-mtd@...ts.infradead.org>
CC:     Mark Brown <broonie@...nel.org>,
        Tudor Ambarus <tudor.ambarus@...rochip.com>,
        Pratyush Yadav <p.yadav@...com>,
        Miquel Raynal <miquel.raynal@...tlin.com>,
        Richard Weinberger <richard@....at>,
        Vignesh Raghavendra <vigneshr@...com>,
        "linux-aspeed@...ts.ozlabs.org" <linux-aspeed@...ts.ozlabs.org>,
        Joel Stanley <joel@....id.au>,
        Andrew Jeffery <andrew@...id.au>,
        "devicetree@...r.kernel.org" <devicetree@...r.kernel.org>,
        Rob Herring <robh+dt@...nel.org>,
        "linux-arm-kernel@...ts.infradead.org" 
        <linux-arm-kernel@...ts.infradead.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Tao Ren <rentao.bupt@...il.com>
Subject: Re: [PATCH v4 08/11] spi: aspeed: Calibrate read timings

On 3/30/22 13:53, Chin-Ting Kuo wrote:
> Hi Cédric Le,
> 
>> -----Original Message-----
>> From: Cédric Le Goater <clg@...d.org>
>> Sent: Friday, March 25, 2022 6:09 PM
>> To: linux-spi@...r.kernel.org; linux-mtd@...ts.infradead.org
>> Subject: [PATCH v4 08/11] spi: aspeed: Calibrate read timings
>>
>> To accommodate the different response time of SPI transfers on different
>> boards and different SPI NOR devices, the Aspeed controllers provide a set of
>> Read Timing Compensation registers to tune the timing delays depending on
>> the frequency being used. The AST2600 SoC has one of these registers per
>> device. On the AST2500 and AST2400 SoCs, the timing register is shared by all
>> devices which is problematic to get good results other than for one device.
>>
>> The algorithm first reads a golden buffer at low speed and then performs reads
>> with different clocks and delay cycle settings to find a breaking point. This
>> selects a default good frequency for the CEx control register.
>> The current settings are a bit optimistic as we pick the first delay giving good
>> results. A safer approach would be to determine an interval and choose the
>> middle value.
>>
>> Calibration is performed when the direct mapping for reads is created.
>> Since the underlying spi-nor object needs to be initialized to create the
>> spi_mem operation for direct mapping, we should be fine. Having a specific
>> API would clarify the requirements though.
>>
>> Cc: Pratyush Yadav <p.yadav@...com>
>> Reviewed-by: Joel Stanley <joel@....id.au>
>> Tested-by: Joel Stanley <joel@....id.au>
>> Tested-by: Tao Ren <rentao.bupt@...il.com>
>> Signed-off-by: Cédric Le Goater <clg@...d.org>
>> ---
>>   drivers/spi/spi-aspeed-smc.c | 281
>> +++++++++++++++++++++++++++++++++++
>>   1 file changed, 281 insertions(+)
>>
>> diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c index
>> 7f306da7c44e..660451667a39 100644
>> --- a/drivers/spi/spi-aspeed-smc.c
>> +++ b/drivers/spi/spi-aspeed-smc.c
>> @@ -33,6 +33,8 @@
>>   #define   CTRL_IO_ADDRESS_4B		BIT(13)	/* AST2400 SPI only */
>>   #define   CTRL_IO_DUMMY_SET(dummy)					\
>>   	(((((dummy) >> 2) & 0x1) << 14) | (((dummy) & 0x3) << 6))
>> +#define   CTRL_FREQ_SEL_SHIFT		8
>> +#define   CTRL_FREQ_SEL_MASK		GENMASK(11,
>> CTRL_FREQ_SEL_SHIFT)
>>   #define   CTRL_CE_STOP_ACTIVE		BIT(2)
>>   #define   CTRL_IO_MODE_CMD_MASK		GENMASK(1, 0)
>>   #define   CTRL_IO_MODE_NORMAL		0x0
>> @@ -45,6 +47,9 @@
>>   /* CEx Address Decoding Range Register */
>>   #define CE0_SEGMENT_ADDR_REG		0x30
>>
>> +/* CEx Read timing compensation register */
>> +#define CE0_TIMING_COMPENSATION_REG	0x94
>> +
>>   enum aspeed_spi_ctl_reg_value {
>>   	ASPEED_SPI_BASE,
>>   	ASPEED_SPI_READ,
>> @@ -70,10 +75,15 @@ struct aspeed_spi_data {
>>   	bool	hastype;
>>   	u32	mode_bits;
>>   	u32	we0;
>> +	u32	timing;
>> +	u32	hclk_mask;
>> +	u32	hdiv_max;
>>
>>   	u32 (*segment_start)(struct aspeed_spi *aspi, u32 reg);
>>   	u32 (*segment_end)(struct aspeed_spi *aspi, u32 reg);
>>   	u32 (*segment_reg)(struct aspeed_spi *aspi, u32 start, u32 end);
>> +	int (*calibrate)(struct aspeed_spi_chip *chip, u32 hdiv,
>> +			 const u8 *golden_buf, u8 *test_buf);
>>   };
>>
>>   #define ASPEED_SPI_MAX_NUM_CS	5
>> @@ -517,6 +527,8 @@ static int aspeed_spi_chip_adjust_window(struct
>> aspeed_spi_chip *chip,
>>   	return 0;
>>   }
>>
>> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip);
>> +
>>   static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)  {
>>   	struct aspeed_spi *aspi =
>> spi_controller_get_devdata(desc->mem->spi->master);
>> @@ -565,6 +577,8 @@ static int aspeed_spi_dirmap_create(struct
>> spi_mem_dirmap_desc *desc)
>>   	chip->ctl_val[ASPEED_SPI_READ] = ctl_val;
>>   	writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
>>
>> +	ret = aspeed_spi_do_calibration(chip);
>> +
>>   	dev_info(aspi->dev, "CE%d read buswidth:%d [0x%08x]\n",
>>   		 chip->cs, op->data.buswidth, chip->ctl_val[ASPEED_SPI_READ]);
>>
>> @@ -812,6 +826,249 @@ static u32 aspeed_spi_segment_ast2600_reg(struct
>> aspeed_spi *aspi,
>>   		((end - 1) & AST2600_SEG_ADDR_MASK);
>>   }
>>
>> +/*
>> + * Read timing compensation sequences
>> + */
>> +
>> +#define CALIBRATE_BUF_SIZE SZ_16K
>> +
>> +static bool aspeed_spi_check_reads(struct aspeed_spi_chip *chip,
>> +				   const u8 *golden_buf, u8 *test_buf) {
>> +	int i;
>> +
>> +	for (i = 0; i < 10; i++) {
>> +		memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
>> +		if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0) { #if
>> +defined(VERBOSE_DEBUG)
>> +			print_hex_dump_bytes(DEVICE_NAME "  fail: ",
>> DUMP_PREFIX_NONE,
>> +					     test_buf, 0x100);
>> +#endif
>> +			return false;
>> +		}
>> +	}
>> +	return true;
>> +}
>> +
>> +#define FREAD_TPASS(i)	(((i) / 2) | (((i) & 1) ? 0 : 8))
>> +
>> +/*
>> + * The timing register is shared by all devices. Only update for CE0.
>> + */
>> +static int aspeed_spi_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
>> +				const u8 *golden_buf, u8 *test_buf) {
>> +	struct aspeed_spi *aspi = chip->aspi;
>> +	const struct aspeed_spi_data *data = aspi->data;
>> +	int i;
>> +	int good_pass = -1, pass_count = 0;
>> +	u32 shift = (hdiv - 1) << 2;
>> +	u32 mask = ~(0xfu << shift);
>> +	u32 fread_timing_val = 0;
>> +
>> +	/* Try HCLK delay 0..5, each one with/without delay and look for a
>> +	 * good pair.
>> +	 */
>> +	for (i = 0; i < 12; i++) {
>> +		bool pass;
>> +
>> +		if (chip->cs == 0) {
>> +			fread_timing_val &= mask;
>> +			fread_timing_val |= FREAD_TPASS(i) << shift;
>> +			writel(fread_timing_val, aspi->regs + data->timing);
>> +		}
>> +		pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
>> +		dev_dbg(aspi->dev,
>> +			"  * [%08x] %d HCLK delay, %dns DI delay : %s",
>> +			fread_timing_val, i / 2, (i & 1) ? 0 : 4,
>> +			pass ? "PASS" : "FAIL");
>> +		if (pass) {
>> +			pass_count++;
>> +			if (pass_count == 3) {
>> +				good_pass = i - 1;
>> +				break;
>> +			}
>> +		} else {
>> +			pass_count = 0;
>> +		}
>> +	}
>> +
>> +	/* No good setting for this frequency */
>> +	if (good_pass < 0)
>> +		return -1;
>> +
>> +	/* We have at least one pass of margin, let's use first pass */
>> +	if (chip->cs == 0) {
>> +		fread_timing_val &= mask;
>> +		fread_timing_val |= FREAD_TPASS(good_pass) << shift;
>> +		writel(fread_timing_val, aspi->regs + data->timing);
>> +	}
>> +	dev_dbg(aspi->dev, " * -> good is pass %d [0x%08x]",
>> +		good_pass, fread_timing_val);
>> +	return 0;
>> +}
>> +
>> +static bool aspeed_spi_check_calib_data(const u8 *test_buf, u32 size) {
>> +	const u32 *tb32 = (const u32 *)test_buf;
>> +	u32 i, cnt = 0;
>> +
>> +	/* We check if we have enough words that are neither all 0
>> +	 * nor all 1's so the calibration can be considered valid.
>> +	 *
>> +	 * I use an arbitrary threshold for now of 64
>> +	 */
>> +	size >>= 2;
>> +	for (i = 0; i < size; i++) {
>> +		if (tb32[i] != 0 && tb32[i] != 0xffffffff)
>> +			cnt++;
>> +	}
>> +	return cnt >= 64;
>> +}
>> +
>> +static const u32 aspeed_spi_hclk_divs[] = {
>> +	0xf, /* HCLK */
>> +	0x7, /* HCLK/2 */
>> +	0xe, /* HCLK/3 */
>> +	0x6, /* HCLK/4 */
>> +	0xd, /* HCLK/5 */
>> +};
>> +
>> +#define ASPEED_SPI_HCLK_DIV(i) \
>> +	(aspeed_spi_hclk_divs[(i) - 1] << CTRL_FREQ_SEL_SHIFT)
>> +
>> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip) {
>> +	struct aspeed_spi *aspi = chip->aspi;
>> +	const struct aspeed_spi_data *data = aspi->data;
>> +	u32 ahb_freq = aspi->clk_freq;
>> +	u32 max_freq = chip->clk_freq;
>> +	u32 ctl_val;
>> +	u8 *golden_buf = NULL;
>> +	u8 *test_buf = NULL;
>> +	int i, rc, best_div = -1;
>> +
>> +	dev_dbg(aspi->dev, "calculate timing compensation - AHB freq: %d MHz",
>> +		ahb_freq / 1000000);
>> +
>> +	/*
>> +	 * use the related low frequency to get check calibration data
>> +	 * and get golden data.
>> +	 */
>> +	ctl_val = chip->ctl_val[ASPEED_SPI_READ] & data->hclk_mask;
>> +	writel(ctl_val, chip->ctl);
>> +
>> +	test_buf = kzalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
>> +	if (!test_buf)
>> +		return -ENOMEM;
>> +
>> +	golden_buf = test_buf + CALIBRATE_BUF_SIZE;
>> +
>> +	memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
>> +	if (!aspeed_spi_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
>> +		dev_info(aspi->dev, "Calibration area too uniform, using low speed");
>> +		goto no_calib;
>> +	}
>> +
>> +#if defined(VERBOSE_DEBUG)
>> +	print_hex_dump_bytes(DEVICE_NAME "  good: ", DUMP_PREFIX_NONE,
>> +			     golden_buf, 0x100);
>> +#endif
>> +
>> +	/* Now we iterate the HCLK dividers until we find our breaking point */
>> +	for (i = ARRAY_SIZE(aspeed_spi_hclk_divs); i > data->hdiv_max - 1; i--) {
>> +		u32 tv, freq;
>> +
>> +		freq = ahb_freq / i;
>> +		if (freq > max_freq)
>> +			continue;
>> +
>> +		/* Set the timing */
>> +		tv = chip->ctl_val[ASPEED_SPI_READ] | ASPEED_SPI_HCLK_DIV(i);
>> +		writel(tv, chip->ctl);
>> +		dev_dbg(aspi->dev, "Trying HCLK/%d [%08x] ...", i, tv);
>> +		rc = data->calibrate(chip, i, golden_buf, test_buf);
>> +		if (rc == 0)
>> +			best_div = i;
>> +	}
>> +
>> +	/* Nothing found ? */
>> +	if (best_div < 0) {
>> +		dev_warn(aspi->dev, "No good frequency, using dumb slow");
>> +	} else {
>> +		dev_dbg(aspi->dev, "Found good read timings at HCLK/%d",
>> best_div);
>> +
>> +		/* Record the freq */
>> +		for (i = 0; i < ASPEED_SPI_MAX; i++)
>> +			chip->ctl_val[i] = (chip->ctl_val[i] & data->hclk_mask) |
>> +				ASPEED_SPI_HCLK_DIV(best_div);
>> +	}
>> +
>> +no_calib:
> 
> - Maybe, if the calibration process is not executed, the frequency setting calculated from max_frequency in the device tree can be filled in FMC10 instead of using dumb slow one, 12.5MHz, always.

Indeed.

>   For example, except for uniform content case, the calibration process will be ignored when SPI clock frequency in the device tree is smaller than 40MHz.
> - The function, aspeed_2600_spi_clk_basic_setting, in [2] can be added to support lower SPI clock frequency, e.g., 4MHz.
>   For AST2600, SPI clock frequency can be calculated by HCLK/(FMC10[27:24] + FMC10[11:8]).

Could you please send patches on top of this series ? Here are the branches :

   https://github.com/legoater/linux/commits/openbmc-5.15
   https://github.com/legoater/linux/commits/aspeed         (mainline)

I can include them when I resend a v5.

Thanks,

C.


> 
>> +	writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
>> +	kfree(test_buf);
>> +	return 0;
>> +}
>> +
>> +#define TIMING_DELAY_DI		BIT(3)
>> +#define TIMING_DELAY_HCYCLE_MAX	5
>> +#define TIMING_REG_AST2600(chip)				\
>> +	((chip)->aspi->regs + (chip)->aspi->data->timing +	\
>> +	 (chip)->cs * 4)
>> +
>> +static int aspeed_spi_ast2600_calibrate(struct aspeed_spi_chip *chip, u32
>> hdiv,
>> +					const u8 *golden_buf, u8 *test_buf) {
>> +	struct aspeed_spi *aspi = chip->aspi;
>> +	int hcycle;
>> +	u32 shift = (hdiv - 2) << 3;
>> +	u32 mask = ~(0xfu << shift);
>> +	u32 fread_timing_val = 0;
>> +
>> +	for (hcycle = 0; hcycle <= TIMING_DELAY_HCYCLE_MAX; hcycle++) {
>> +		int delay_ns;
>> +		bool pass = false;
>> +
>> +		fread_timing_val &= mask;
>> +		fread_timing_val |= hcycle << shift;
>> +
>> +		/* no DI input delay first  */
>> +		writel(fread_timing_val, TIMING_REG_AST2600(chip));
>> +		pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
>> +		dev_dbg(aspi->dev,
>> +			"  * [%08x] %d HCLK delay, DI delay none : %s",
>> +			fread_timing_val, hcycle, pass ? "PASS" : "FAIL");
>> +		if (pass)
>> +			return 0;
>> +
>> +		/* Add DI input delays  */
>> +		fread_timing_val &= mask;
>> +		fread_timing_val |= (TIMING_DELAY_DI | hcycle) << shift;
>> +
>> +		for (delay_ns = 0; delay_ns < 0x10; delay_ns++) {
>> +			fread_timing_val &= ~(0xf << (4 + shift));
>> +			fread_timing_val |= delay_ns << (4 + shift);
>> +
>> +			writel(fread_timing_val, TIMING_REG_AST2600(chip));
>> +			pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
>> +			dev_dbg(aspi->dev,
>> +				"  * [%08x] %d HCLK delay, DI delay %d.%dns : %s",
>> +				fread_timing_val, hcycle, (delay_ns + 1) / 2,
>> +				(delay_ns + 1) & 1 ? 5 : 5, pass ? "PASS" : "FAIL");
>> +			/*
>> +			 * TODO: This is optimistic. We should look
>> +			 * for a working interval and save the middle
>> +			 * value in the read timing register.
>> +			 */
>> +			if (pass)
>> +				return 0;
>> +		}
>> +	}
>> +
>> +	/* No good setting for this frequency */
>> +	return -1;
>> +}
>> +
>>   /*
>>    * Platform definitions
>>    */
>> @@ -820,6 +1077,10 @@ static const struct aspeed_spi_data
>> ast2400_fmc_data = {
>>   	.hastype       = true,
>>   	.we0	       = 16,
>>   	.ctl0	       = CE0_CTRL_REG,
>> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
>> +	.hclk_mask     = 0xfffff0ff,
>> +	.hdiv_max      = 1,
>> +	.calibrate     = aspeed_spi_calibrate,
>>   	.segment_start = aspeed_spi_segment_start,
>>   	.segment_end   = aspeed_spi_segment_end,
>>   	.segment_reg   = aspeed_spi_segment_reg,
>> @@ -830,6 +1091,10 @@ static const struct aspeed_spi_data
>> ast2400_spi_data = {
>>   	.hastype       = false,
>>   	.we0	       = 0,
>>   	.ctl0	       = 0x04,
>> +	.timing	       = 0x14,
>> +	.hclk_mask     = 0xfffff0ff,
>> +	.hdiv_max      = 1,
>> +	.calibrate     = aspeed_spi_calibrate,
>>   	/* No segment registers */
>>   };
>>
>> @@ -838,6 +1103,10 @@ static const struct aspeed_spi_data
>> ast2500_fmc_data = {
>>   	.hastype       = true,
>>   	.we0	       = 16,
>>   	.ctl0	       = CE0_CTRL_REG,
>> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
>> +	.hclk_mask     = 0xfffff0ff,
>> +	.hdiv_max      = 1,
>> +	.calibrate     = aspeed_spi_calibrate,
>>   	.segment_start = aspeed_spi_segment_start,
>>   	.segment_end   = aspeed_spi_segment_end,
>>   	.segment_reg   = aspeed_spi_segment_reg,
>> @@ -848,6 +1117,10 @@ static const struct aspeed_spi_data
>> ast2500_spi_data = {
>>   	.hastype       = false,
>>   	.we0	       = 16,
>>   	.ctl0	       = CE0_CTRL_REG,
>> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
>> +	.hclk_mask     = 0xfffff0ff,
>> +	.hdiv_max      = 1,
>> +	.calibrate     = aspeed_spi_calibrate,
>>   	.segment_start = aspeed_spi_segment_start,
>>   	.segment_end   = aspeed_spi_segment_end,
>>   	.segment_reg   = aspeed_spi_segment_reg,
>> @@ -859,6 +1132,10 @@ static const struct aspeed_spi_data
>> ast2600_fmc_data = {
>>   	.mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
>>   	.we0	       = 16,
>>   	.ctl0	       = CE0_CTRL_REG,
>> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
>> +	.hclk_mask     = 0xf0fff0ff,
>> +	.hdiv_max      = 2,
>> +	.calibrate     = aspeed_spi_ast2600_calibrate,
>>   	.segment_start = aspeed_spi_segment_ast2600_start,
>>   	.segment_end   = aspeed_spi_segment_ast2600_end,
>>   	.segment_reg   = aspeed_spi_segment_ast2600_reg,
>> @@ -870,6 +1147,10 @@ static const struct aspeed_spi_data
>> ast2600_spi_data = {
>>   	.mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
>>   	.we0	       = 16,
>>   	.ctl0	       = CE0_CTRL_REG,
>> +	.timing	       = CE0_TIMING_COMPENSATION_REG,
>> +	.hclk_mask     = 0xf0fff0ff,
>> +	.hdiv_max      = 2,
>> +	.calibrate     = aspeed_spi_ast2600_calibrate,
>>   	.segment_start = aspeed_spi_segment_ast2600_start,
>>   	.segment_end   = aspeed_spi_segment_ast2600_end,
>>   	.segment_reg   = aspeed_spi_segment_ast2600_reg,
>> --
>> 2.34.1
> 
> 
> Best Wishes,
> Chin-Ting

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ