[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0ec477b5-e404-536f-ff60-39f43208c3cc@kaod.org>
Date: Wed, 30 Mar 2022 14:14:40 +0200
From: Cédric Le Goater <clg@...d.org>
To: Chin-Ting Kuo <chin-ting_kuo@...eedtech.com>,
"linux-spi@...r.kernel.org" <linux-spi@...r.kernel.org>,
"linux-mtd@...ts.infradead.org" <linux-mtd@...ts.infradead.org>
CC: Mark Brown <broonie@...nel.org>,
Tudor Ambarus <tudor.ambarus@...rochip.com>,
Pratyush Yadav <p.yadav@...com>,
Miquel Raynal <miquel.raynal@...tlin.com>,
Richard Weinberger <richard@....at>,
Vignesh Raghavendra <vigneshr@...com>,
"linux-aspeed@...ts.ozlabs.org" <linux-aspeed@...ts.ozlabs.org>,
Joel Stanley <joel@....id.au>,
Andrew Jeffery <andrew@...id.au>,
"devicetree@...r.kernel.org" <devicetree@...r.kernel.org>,
Rob Herring <robh+dt@...nel.org>,
"linux-arm-kernel@...ts.infradead.org"
<linux-arm-kernel@...ts.infradead.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Tao Ren <rentao.bupt@...il.com>
Subject: Re: [PATCH v4 08/11] spi: aspeed: Calibrate read timings
On 3/30/22 13:53, Chin-Ting Kuo wrote:
> Hi Cédric Le,
>
>> -----Original Message-----
>> From: Cédric Le Goater <clg@...d.org>
>> Sent: Friday, March 25, 2022 6:09 PM
>> To: linux-spi@...r.kernel.org; linux-mtd@...ts.infradead.org
>> Subject: [PATCH v4 08/11] spi: aspeed: Calibrate read timings
>>
>> To accommodate the different response time of SPI transfers on different
>> boards and different SPI NOR devices, the Aspeed controllers provide a set of
>> Read Timing Compensation registers to tune the timing delays depending on
>> the frequency being used. The AST2600 SoC has one of these registers per
>> device. On the AST2500 and AST2400 SoCs, the timing register is shared by all
>> devices which is problematic to get good results other than for one device.
>>
>> The algorithm first reads a golden buffer at low speed and then performs reads
>> with different clocks and delay cycle settings to find a breaking point. This
>> selects a default good frequency for the CEx control register.
>> The current settings are a bit optimistic as we pick the first delay giving good
>> results. A safer approach would be to determine an interval and choose the
>> middle value.
>>
>> Calibration is performed when the direct mapping for reads is created.
>> Since the underlying spi-nor object needs to be initialized to create the
>> spi_mem operation for direct mapping, we should be fine. Having a specific
>> API would clarify the requirements though.
>>
>> Cc: Pratyush Yadav <p.yadav@...com>
>> Reviewed-by: Joel Stanley <joel@....id.au>
>> Tested-by: Joel Stanley <joel@....id.au>
>> Tested-by: Tao Ren <rentao.bupt@...il.com>
>> Signed-off-by: Cédric Le Goater <clg@...d.org>
>> ---
>> drivers/spi/spi-aspeed-smc.c | 281
>> +++++++++++++++++++++++++++++++++++
>> 1 file changed, 281 insertions(+)
>>
>> diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c index
>> 7f306da7c44e..660451667a39 100644
>> --- a/drivers/spi/spi-aspeed-smc.c
>> +++ b/drivers/spi/spi-aspeed-smc.c
>> @@ -33,6 +33,8 @@
>> #define CTRL_IO_ADDRESS_4B BIT(13) /* AST2400 SPI only */
>> #define CTRL_IO_DUMMY_SET(dummy) \
>> (((((dummy) >> 2) & 0x1) << 14) | (((dummy) & 0x3) << 6))
>> +#define CTRL_FREQ_SEL_SHIFT 8
>> +#define CTRL_FREQ_SEL_MASK GENMASK(11,
>> CTRL_FREQ_SEL_SHIFT)
>> #define CTRL_CE_STOP_ACTIVE BIT(2)
>> #define CTRL_IO_MODE_CMD_MASK GENMASK(1, 0)
>> #define CTRL_IO_MODE_NORMAL 0x0
>> @@ -45,6 +47,9 @@
>> /* CEx Address Decoding Range Register */
>> #define CE0_SEGMENT_ADDR_REG 0x30
>>
>> +/* CEx Read timing compensation register */
>> +#define CE0_TIMING_COMPENSATION_REG 0x94
>> +
>> enum aspeed_spi_ctl_reg_value {
>> ASPEED_SPI_BASE,
>> ASPEED_SPI_READ,
>> @@ -70,10 +75,15 @@ struct aspeed_spi_data {
>> bool hastype;
>> u32 mode_bits;
>> u32 we0;
>> + u32 timing;
>> + u32 hclk_mask;
>> + u32 hdiv_max;
>>
>> u32 (*segment_start)(struct aspeed_spi *aspi, u32 reg);
>> u32 (*segment_end)(struct aspeed_spi *aspi, u32 reg);
>> u32 (*segment_reg)(struct aspeed_spi *aspi, u32 start, u32 end);
>> + int (*calibrate)(struct aspeed_spi_chip *chip, u32 hdiv,
>> + const u8 *golden_buf, u8 *test_buf);
>> };
>>
>> #define ASPEED_SPI_MAX_NUM_CS 5
>> @@ -517,6 +527,8 @@ static int aspeed_spi_chip_adjust_window(struct
>> aspeed_spi_chip *chip,
>> return 0;
>> }
>>
>> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip);
>> +
>> static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc) {
>> struct aspeed_spi *aspi =
>> spi_controller_get_devdata(desc->mem->spi->master);
>> @@ -565,6 +577,8 @@ static int aspeed_spi_dirmap_create(struct
>> spi_mem_dirmap_desc *desc)
>> chip->ctl_val[ASPEED_SPI_READ] = ctl_val;
>> writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
>>
>> + ret = aspeed_spi_do_calibration(chip);
>> +
>> dev_info(aspi->dev, "CE%d read buswidth:%d [0x%08x]\n",
>> chip->cs, op->data.buswidth, chip->ctl_val[ASPEED_SPI_READ]);
>>
>> @@ -812,6 +826,249 @@ static u32 aspeed_spi_segment_ast2600_reg(struct
>> aspeed_spi *aspi,
>> ((end - 1) & AST2600_SEG_ADDR_MASK);
>> }
>>
>> +/*
>> + * Read timing compensation sequences
>> + */
>> +
>> +#define CALIBRATE_BUF_SIZE SZ_16K
>> +
>> +static bool aspeed_spi_check_reads(struct aspeed_spi_chip *chip,
>> + const u8 *golden_buf, u8 *test_buf) {
>> + int i;
>> +
>> + for (i = 0; i < 10; i++) {
>> + memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
>> + if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0) { #if
>> +defined(VERBOSE_DEBUG)
>> + print_hex_dump_bytes(DEVICE_NAME " fail: ",
>> DUMP_PREFIX_NONE,
>> + test_buf, 0x100);
>> +#endif
>> + return false;
>> + }
>> + }
>> + return true;
>> +}
>> +
>> +#define FREAD_TPASS(i) (((i) / 2) | (((i) & 1) ? 0 : 8))
>> +
>> +/*
>> + * The timing register is shared by all devices. Only update for CE0.
>> + */
>> +static int aspeed_spi_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
>> + const u8 *golden_buf, u8 *test_buf) {
>> + struct aspeed_spi *aspi = chip->aspi;
>> + const struct aspeed_spi_data *data = aspi->data;
>> + int i;
>> + int good_pass = -1, pass_count = 0;
>> + u32 shift = (hdiv - 1) << 2;
>> + u32 mask = ~(0xfu << shift);
>> + u32 fread_timing_val = 0;
>> +
>> + /* Try HCLK delay 0..5, each one with/without delay and look for a
>> + * good pair.
>> + */
>> + for (i = 0; i < 12; i++) {
>> + bool pass;
>> +
>> + if (chip->cs == 0) {
>> + fread_timing_val &= mask;
>> + fread_timing_val |= FREAD_TPASS(i) << shift;
>> + writel(fread_timing_val, aspi->regs + data->timing);
>> + }
>> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
>> + dev_dbg(aspi->dev,
>> + " * [%08x] %d HCLK delay, %dns DI delay : %s",
>> + fread_timing_val, i / 2, (i & 1) ? 0 : 4,
>> + pass ? "PASS" : "FAIL");
>> + if (pass) {
>> + pass_count++;
>> + if (pass_count == 3) {
>> + good_pass = i - 1;
>> + break;
>> + }
>> + } else {
>> + pass_count = 0;
>> + }
>> + }
>> +
>> + /* No good setting for this frequency */
>> + if (good_pass < 0)
>> + return -1;
>> +
>> + /* We have at least one pass of margin, let's use first pass */
>> + if (chip->cs == 0) {
>> + fread_timing_val &= mask;
>> + fread_timing_val |= FREAD_TPASS(good_pass) << shift;
>> + writel(fread_timing_val, aspi->regs + data->timing);
>> + }
>> + dev_dbg(aspi->dev, " * -> good is pass %d [0x%08x]",
>> + good_pass, fread_timing_val);
>> + return 0;
>> +}
>> +
>> +static bool aspeed_spi_check_calib_data(const u8 *test_buf, u32 size) {
>> + const u32 *tb32 = (const u32 *)test_buf;
>> + u32 i, cnt = 0;
>> +
>> + /* We check if we have enough words that are neither all 0
>> + * nor all 1's so the calibration can be considered valid.
>> + *
>> + * I use an arbitrary threshold for now of 64
>> + */
>> + size >>= 2;
>> + for (i = 0; i < size; i++) {
>> + if (tb32[i] != 0 && tb32[i] != 0xffffffff)
>> + cnt++;
>> + }
>> + return cnt >= 64;
>> +}
>> +
>> +static const u32 aspeed_spi_hclk_divs[] = {
>> + 0xf, /* HCLK */
>> + 0x7, /* HCLK/2 */
>> + 0xe, /* HCLK/3 */
>> + 0x6, /* HCLK/4 */
>> + 0xd, /* HCLK/5 */
>> +};
>> +
>> +#define ASPEED_SPI_HCLK_DIV(i) \
>> + (aspeed_spi_hclk_divs[(i) - 1] << CTRL_FREQ_SEL_SHIFT)
>> +
>> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip) {
>> + struct aspeed_spi *aspi = chip->aspi;
>> + const struct aspeed_spi_data *data = aspi->data;
>> + u32 ahb_freq = aspi->clk_freq;
>> + u32 max_freq = chip->clk_freq;
>> + u32 ctl_val;
>> + u8 *golden_buf = NULL;
>> + u8 *test_buf = NULL;
>> + int i, rc, best_div = -1;
>> +
>> + dev_dbg(aspi->dev, "calculate timing compensation - AHB freq: %d MHz",
>> + ahb_freq / 1000000);
>> +
>> + /*
>> + * use the related low frequency to get check calibration data
>> + * and get golden data.
>> + */
>> + ctl_val = chip->ctl_val[ASPEED_SPI_READ] & data->hclk_mask;
>> + writel(ctl_val, chip->ctl);
>> +
>> + test_buf = kzalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
>> + if (!test_buf)
>> + return -ENOMEM;
>> +
>> + golden_buf = test_buf + CALIBRATE_BUF_SIZE;
>> +
>> + memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
>> + if (!aspeed_spi_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
>> + dev_info(aspi->dev, "Calibration area too uniform, using low speed");
>> + goto no_calib;
>> + }
>> +
>> +#if defined(VERBOSE_DEBUG)
>> + print_hex_dump_bytes(DEVICE_NAME " good: ", DUMP_PREFIX_NONE,
>> + golden_buf, 0x100);
>> +#endif
>> +
>> + /* Now we iterate the HCLK dividers until we find our breaking point */
>> + for (i = ARRAY_SIZE(aspeed_spi_hclk_divs); i > data->hdiv_max - 1; i--) {
>> + u32 tv, freq;
>> +
>> + freq = ahb_freq / i;
>> + if (freq > max_freq)
>> + continue;
>> +
>> + /* Set the timing */
>> + tv = chip->ctl_val[ASPEED_SPI_READ] | ASPEED_SPI_HCLK_DIV(i);
>> + writel(tv, chip->ctl);
>> + dev_dbg(aspi->dev, "Trying HCLK/%d [%08x] ...", i, tv);
>> + rc = data->calibrate(chip, i, golden_buf, test_buf);
>> + if (rc == 0)
>> + best_div = i;
>> + }
>> +
>> + /* Nothing found ? */
>> + if (best_div < 0) {
>> + dev_warn(aspi->dev, "No good frequency, using dumb slow");
>> + } else {
>> + dev_dbg(aspi->dev, "Found good read timings at HCLK/%d",
>> best_div);
>> +
>> + /* Record the freq */
>> + for (i = 0; i < ASPEED_SPI_MAX; i++)
>> + chip->ctl_val[i] = (chip->ctl_val[i] & data->hclk_mask) |
>> + ASPEED_SPI_HCLK_DIV(best_div);
>> + }
>> +
>> +no_calib:
>
> - Maybe, if the calibration process is not executed, the frequency setting calculated from max_frequency in the device tree can be filled in FMC10 instead of using dumb slow one, 12.5MHz, always.
Indeed.
> For example, except for uniform content case, the calibration process will be ignored when SPI clock frequency in the device tree is smaller than 40MHz.
> - The function, aspeed_2600_spi_clk_basic_setting, in [2] can be added to support lower SPI clock frequency, e.g., 4MHz.
> For AST2600, SPI clock frequency can be calculated by HCLK/(FMC10[27:24] + FMC10[11:8]).
Could you please send patches on top of this series ? Here are the branches :
https://github.com/legoater/linux/commits/openbmc-5.15
https://github.com/legoater/linux/commits/aspeed (mainline)
I can include them when I resend a v5.
Thanks,
C.
>
>> + writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
>> + kfree(test_buf);
>> + return 0;
>> +}
>> +
>> +#define TIMING_DELAY_DI BIT(3)
>> +#define TIMING_DELAY_HCYCLE_MAX 5
>> +#define TIMING_REG_AST2600(chip) \
>> + ((chip)->aspi->regs + (chip)->aspi->data->timing + \
>> + (chip)->cs * 4)
>> +
>> +static int aspeed_spi_ast2600_calibrate(struct aspeed_spi_chip *chip, u32
>> hdiv,
>> + const u8 *golden_buf, u8 *test_buf) {
>> + struct aspeed_spi *aspi = chip->aspi;
>> + int hcycle;
>> + u32 shift = (hdiv - 2) << 3;
>> + u32 mask = ~(0xfu << shift);
>> + u32 fread_timing_val = 0;
>> +
>> + for (hcycle = 0; hcycle <= TIMING_DELAY_HCYCLE_MAX; hcycle++) {
>> + int delay_ns;
>> + bool pass = false;
>> +
>> + fread_timing_val &= mask;
>> + fread_timing_val |= hcycle << shift;
>> +
>> + /* no DI input delay first */
>> + writel(fread_timing_val, TIMING_REG_AST2600(chip));
>> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
>> + dev_dbg(aspi->dev,
>> + " * [%08x] %d HCLK delay, DI delay none : %s",
>> + fread_timing_val, hcycle, pass ? "PASS" : "FAIL");
>> + if (pass)
>> + return 0;
>> +
>> + /* Add DI input delays */
>> + fread_timing_val &= mask;
>> + fread_timing_val |= (TIMING_DELAY_DI | hcycle) << shift;
>> +
>> + for (delay_ns = 0; delay_ns < 0x10; delay_ns++) {
>> + fread_timing_val &= ~(0xf << (4 + shift));
>> + fread_timing_val |= delay_ns << (4 + shift);
>> +
>> + writel(fread_timing_val, TIMING_REG_AST2600(chip));
>> + pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
>> + dev_dbg(aspi->dev,
>> + " * [%08x] %d HCLK delay, DI delay %d.%dns : %s",
>> + fread_timing_val, hcycle, (delay_ns + 1) / 2,
>> + (delay_ns + 1) & 1 ? 5 : 5, pass ? "PASS" : "FAIL");
>> + /*
>> + * TODO: This is optimistic. We should look
>> + * for a working interval and save the middle
>> + * value in the read timing register.
>> + */
>> + if (pass)
>> + return 0;
>> + }
>> + }
>> +
>> + /* No good setting for this frequency */
>> + return -1;
>> +}
>> +
>> /*
>> * Platform definitions
>> */
>> @@ -820,6 +1077,10 @@ static const struct aspeed_spi_data
>> ast2400_fmc_data = {
>> .hastype = true,
>> .we0 = 16,
>> .ctl0 = CE0_CTRL_REG,
>> + .timing = CE0_TIMING_COMPENSATION_REG,
>> + .hclk_mask = 0xfffff0ff,
>> + .hdiv_max = 1,
>> + .calibrate = aspeed_spi_calibrate,
>> .segment_start = aspeed_spi_segment_start,
>> .segment_end = aspeed_spi_segment_end,
>> .segment_reg = aspeed_spi_segment_reg,
>> @@ -830,6 +1091,10 @@ static const struct aspeed_spi_data
>> ast2400_spi_data = {
>> .hastype = false,
>> .we0 = 0,
>> .ctl0 = 0x04,
>> + .timing = 0x14,
>> + .hclk_mask = 0xfffff0ff,
>> + .hdiv_max = 1,
>> + .calibrate = aspeed_spi_calibrate,
>> /* No segment registers */
>> };
>>
>> @@ -838,6 +1103,10 @@ static const struct aspeed_spi_data
>> ast2500_fmc_data = {
>> .hastype = true,
>> .we0 = 16,
>> .ctl0 = CE0_CTRL_REG,
>> + .timing = CE0_TIMING_COMPENSATION_REG,
>> + .hclk_mask = 0xfffff0ff,
>> + .hdiv_max = 1,
>> + .calibrate = aspeed_spi_calibrate,
>> .segment_start = aspeed_spi_segment_start,
>> .segment_end = aspeed_spi_segment_end,
>> .segment_reg = aspeed_spi_segment_reg,
>> @@ -848,6 +1117,10 @@ static const struct aspeed_spi_data
>> ast2500_spi_data = {
>> .hastype = false,
>> .we0 = 16,
>> .ctl0 = CE0_CTRL_REG,
>> + .timing = CE0_TIMING_COMPENSATION_REG,
>> + .hclk_mask = 0xfffff0ff,
>> + .hdiv_max = 1,
>> + .calibrate = aspeed_spi_calibrate,
>> .segment_start = aspeed_spi_segment_start,
>> .segment_end = aspeed_spi_segment_end,
>> .segment_reg = aspeed_spi_segment_reg,
>> @@ -859,6 +1132,10 @@ static const struct aspeed_spi_data
>> ast2600_fmc_data = {
>> .mode_bits = SPI_RX_QUAD | SPI_RX_QUAD,
>> .we0 = 16,
>> .ctl0 = CE0_CTRL_REG,
>> + .timing = CE0_TIMING_COMPENSATION_REG,
>> + .hclk_mask = 0xf0fff0ff,
>> + .hdiv_max = 2,
>> + .calibrate = aspeed_spi_ast2600_calibrate,
>> .segment_start = aspeed_spi_segment_ast2600_start,
>> .segment_end = aspeed_spi_segment_ast2600_end,
>> .segment_reg = aspeed_spi_segment_ast2600_reg,
>> @@ -870,6 +1147,10 @@ static const struct aspeed_spi_data
>> ast2600_spi_data = {
>> .mode_bits = SPI_RX_QUAD | SPI_RX_QUAD,
>> .we0 = 16,
>> .ctl0 = CE0_CTRL_REG,
>> + .timing = CE0_TIMING_COMPENSATION_REG,
>> + .hclk_mask = 0xf0fff0ff,
>> + .hdiv_max = 2,
>> + .calibrate = aspeed_spi_ast2600_calibrate,
>> .segment_start = aspeed_spi_segment_ast2600_start,
>> .segment_end = aspeed_spi_segment_ast2600_end,
>> .segment_reg = aspeed_spi_segment_ast2600_reg,
>> --
>> 2.34.1
>
>
> Best Wishes,
> Chin-Ting
Powered by blists - more mailing lists