lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACPK8Xe8wc-z5ntfAfe96tMmq8eEvk0x60GG6jjc=F7BbU5xRQ@mail.gmail.com>
Date:   Wed, 2 Mar 2022 22:44:20 +0000
From:   Joel Stanley <joel@....id.au>
To:     Cédric Le Goater <clg@...d.org>
Cc:     linux-spi@...r.kernel.org,
        linux-mtd <linux-mtd@...ts.infradead.org>,
        Mark Brown <broonie@...nel.org>,
        Tudor Ambarus <tudor.ambarus@...rochip.com>,
        Pratyush Yadav <p.yadav@...com>,
        Miquel Raynal <miquel.raynal@...tlin.com>,
        Richard Weinberger <richard@....at>,
        Vignesh Raghavendra <vigneshr@...com>,
        linux-aspeed <linux-aspeed@...ts.ozlabs.org>,
        Andrew Jeffery <andrew@...id.au>,
        Chin-Ting Kuo <chin-ting_kuo@...eedtech.com>,
        devicetree <devicetree@...r.kernel.org>,
        Rob Herring <robh+dt@...nel.org>,
        Linux ARM <linux-arm-kernel@...ts.infradead.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2 09/10] spi: aspeed: Calibrate read timings

On Wed, 2 Mar 2022 at 17:32, Cédric Le Goater <clg@...d.org> wrote:
>
> To accommodate the different response time of SPI transfers on different
> boards and different SPI NOR devices, the Aspeed controllers provide a
> set of Read Timing Compensation registers to tune the timing delays
> depending on the frequency being used. The AST2600 SoC has one of
> these registers per device. On the AST2500 and AST2400 SoCs, the
> timing register is shared by all devices which is a bit problematic to
> get good results other than for one device.
>
> The algorithm first reads a golden buffer at low speed and then performs
> reads with different clocks and delay cycle settings to find a breaking
> point. This selects a default good frequency for the CEx control register.
> The current settings are bit optimistic as we pick the first delay giving

typo: are a bit

> good results. A safer approach would be to determine an interval and
> choose the middle value.
>
> Calibration is performed when the direct mapping for reads is created.
> Since the underlying spi-nor object needs to be initialized to create
> the spi_mem operation for direct mapping, we should be fine. Having a
> specific API would clarify the requirements though.
>
> Cc: Pratyush Yadav <p.yadav@...com>
> Signed-off-by: Cédric Le Goater <clg@...d.org>

Reviewed-by: Joel Stanley <joel@....id.au>

> ---
>  drivers/spi/spi-aspeed-smc.c | 281 +++++++++++++++++++++++++++++++++++
>  1 file changed, 281 insertions(+)
>
> diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c
> index 8c6d7f79d97f..dce25dfe6913 100644
> --- a/drivers/spi/spi-aspeed-smc.c
> +++ b/drivers/spi/spi-aspeed-smc.c
> @@ -35,6 +35,8 @@
>  #define   CTRL_IO_ADDRESS_4B           BIT(13) /* AST2400 SPI only */
>  #define   CTRL_IO_DUMMY_SET(dummy)                                     \
>         (((((dummy) >> 2) & 0x1) << 14) | (((dummy) & 0x3) << 6))
> +#define   CTRL_FREQ_SEL_SHIFT          8
> +#define   CTRL_FREQ_SEL_MASK           GENMASK(11, CTRL_FREQ_SEL_SHIFT)
>  #define   CTRL_CE_STOP_ACTIVE          BIT(2)
>  #define   CTRL_IO_MODE_CMD_MASK                GENMASK(1, 0)
>  #define   CTRL_IO_MODE_NORMAL          0x0
> @@ -47,6 +49,9 @@
>  /* CEx Address Decoding Range Register */
>  #define CE0_SEGMENT_ADDR_REG           0x30
>
> +/* CEx Read timing compensation register */
> +#define CE0_TIMING_COMPENSATION_REG    0x94
> +
>  enum aspeed_spi_ctl_reg_value {
>         ASPEED_SPI_BASE,
>         ASPEED_SPI_READ,
> @@ -72,10 +77,15 @@ struct aspeed_spi_data {
>         bool    hastype;
>         u32     mode_bits;
>         u32     we0;
> +       u32     timing;
> +       u32     hclk_mask;
> +       u32     hdiv_max;
>
>         u32 (*segment_start)(struct aspeed_spi *aspi, u32 reg);
>         u32 (*segment_end)(struct aspeed_spi *aspi, u32 reg);
>         u32 (*segment_reg)(struct aspeed_spi *aspi, u32 start, u32 end);
> +       int (*calibrate)(struct aspeed_spi_chip *chip, u32 hdiv,
> +                        const u8 *golden_buf, u8 *test_buf);
>  };
>
>  #define ASPEED_SPI_MAX_NUM_CS  5
> @@ -519,6 +529,8 @@ static int aspeed_spi_chip_adjust_window(struct aspeed_spi_chip *chip,
>         return 0;
>  }
>
> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip);
> +
>  static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
>  {
>         struct aspeed_spi *aspi = spi_controller_get_devdata(desc->mem->spi->master);
> @@ -567,6 +579,8 @@ static int aspeed_spi_dirmap_create(struct spi_mem_dirmap_desc *desc)
>         chip->ctl_val[ASPEED_SPI_READ] = ctl_val;
>         writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
>
> +       ret = aspeed_spi_do_calibration(chip);
> +
>         dev_info(aspi->dev, "CE%d read buswidth:%d [0x%08x]\n",
>                  chip->cs, op->data.buswidth, chip->ctl_val[ASPEED_SPI_READ]);
>
> @@ -814,6 +828,249 @@ static u32 aspeed_spi_segment_ast2600_reg(struct aspeed_spi *aspi,
>                 ((end - 1) & AST2600_SEG_ADDR_MASK);
>  }
>
> +/*
> + * Read timing compensation sequences
> + */
> +
> +#define CALIBRATE_BUF_SIZE SZ_16K
> +
> +static bool aspeed_spi_check_reads(struct aspeed_spi_chip *chip,
> +                                  const u8 *golden_buf, u8 *test_buf)
> +{
> +       int i;
> +
> +       for (i = 0; i < 10; i++) {
> +               memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
> +               if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0) {
> +#if defined(VERBOSE_DEBUG)
> +                       print_hex_dump_bytes(DEVICE_NAME "  fail: ", DUMP_PREFIX_NONE,
> +                                            test_buf, 0x100);
> +#endif
> +                       return false;
> +               }
> +       }
> +       return true;
> +}
> +
> +#define FREAD_TPASS(i) (((i) / 2) | (((i) & 1) ? 0 : 8))
> +
> +/*
> + * The timing register is shared by all devices. Only update for CE0.
> + */
> +static int aspeed_spi_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
> +                               const u8 *golden_buf, u8 *test_buf)
> +{
> +       struct aspeed_spi *aspi = chip->aspi;
> +       const struct aspeed_spi_data *data = aspi->data;
> +       int i;
> +       int good_pass = -1, pass_count = 0;
> +       u32 shift = (hdiv - 1) << 2;
> +       u32 mask = ~(0xfu << shift);
> +       u32 fread_timing_val = 0;
> +
> +       /* Try HCLK delay 0..5, each one with/without delay and look for a
> +        * good pair.
> +        */
> +       for (i = 0; i < 12; i++) {
> +               bool pass;
> +
> +               if (chip->cs == 0) {
> +                       fread_timing_val &= mask;
> +                       fread_timing_val |= FREAD_TPASS(i) << shift;
> +                       writel(fread_timing_val, aspi->regs + data->timing);
> +               }
> +               pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> +               dev_dbg(aspi->dev,
> +                       "  * [%08x] %d HCLK delay, %dns DI delay : %s",
> +                       fread_timing_val, i / 2, (i & 1) ? 0 : 4,
> +                       pass ? "PASS" : "FAIL");
> +               if (pass) {
> +                       pass_count++;
> +                       if (pass_count == 3) {
> +                               good_pass = i - 1;
> +                               break;
> +                       }
> +               } else {
> +                       pass_count = 0;
> +               }
> +       }
> +
> +       /* No good setting for this frequency */
> +       if (good_pass < 0)
> +               return -1;
> +
> +       /* We have at least one pass of margin, let's use first pass */
> +       if (chip->cs == 0) {
> +               fread_timing_val &= mask;
> +               fread_timing_val |= FREAD_TPASS(good_pass) << shift;
> +               writel(fread_timing_val, aspi->regs + data->timing);
> +       }
> +       dev_dbg(aspi->dev, " * -> good is pass %d [0x%08x]",
> +               good_pass, fread_timing_val);
> +       return 0;
> +}
> +
> +static bool aspeed_spi_check_calib_data(const u8 *test_buf, u32 size)
> +{
> +       const u32 *tb32 = (const u32 *)test_buf;
> +       u32 i, cnt = 0;
> +
> +       /* We check if we have enough words that are neither all 0
> +        * nor all 1's so the calibration can be considered valid.
> +        *
> +        * I use an arbitrary threshold for now of 64
> +        */
> +       size >>= 2;
> +       for (i = 0; i < size; i++) {
> +               if (tb32[i] != 0 && tb32[i] != 0xffffffff)
> +                       cnt++;
> +       }
> +       return cnt >= 64;
> +}
> +
> +static const u32 aspeed_spi_hclk_divs[] = {
> +       0xf, /* HCLK */
> +       0x7, /* HCLK/2 */
> +       0xe, /* HCLK/3 */
> +       0x6, /* HCLK/4 */
> +       0xd, /* HCLK/5 */
> +};
> +
> +#define ASPEED_SPI_HCLK_DIV(i) \
> +       (aspeed_spi_hclk_divs[(i) - 1] << CTRL_FREQ_SEL_SHIFT)
> +
> +static int aspeed_spi_do_calibration(struct aspeed_spi_chip *chip)
> +{
> +       struct aspeed_spi *aspi = chip->aspi;
> +       const struct aspeed_spi_data *data = aspi->data;
> +       u32 ahb_freq = aspi->clk_freq;
> +       u32 max_freq = chip->clk_freq;
> +       u32 ctl_val;
> +       u8 *golden_buf = NULL;
> +       u8 *test_buf = NULL;
> +       int i, rc, best_div = -1;
> +
> +       dev_dbg(aspi->dev, "calculate timing compensation - AHB freq: %d MHz",
> +               ahb_freq / 1000000);
> +
> +       /*
> +        * use the related low frequency to get check calibration data
> +        * and get golden data.
> +        */
> +       ctl_val = chip->ctl_val[ASPEED_SPI_READ] & data->hclk_mask;
> +       writel(ctl_val, chip->ctl);
> +
> +       test_buf = kzalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
> +       if (!test_buf)
> +               return -ENOMEM;
> +
> +       golden_buf = test_buf + CALIBRATE_BUF_SIZE;
> +
> +       memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
> +       if (!aspeed_spi_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
> +               dev_info(aspi->dev, "Calibration area too uniform, using low speed");
> +               goto no_calib;
> +       }
> +
> +#if defined(VERBOSE_DEBUG)
> +       print_hex_dump_bytes(DEVICE_NAME "  good: ", DUMP_PREFIX_NONE,
> +                            golden_buf, 0x100);
> +#endif
> +
> +       /* Now we iterate the HCLK dividers until we find our breaking point */
> +       for (i = ARRAY_SIZE(aspeed_spi_hclk_divs); i > data->hdiv_max - 1; i--) {
> +               u32 tv, freq;
> +
> +               freq = ahb_freq / i;
> +               if (freq > max_freq)
> +                       continue;
> +
> +               /* Set the timing */
> +               tv = chip->ctl_val[ASPEED_SPI_READ] | ASPEED_SPI_HCLK_DIV(i);
> +               writel(tv, chip->ctl);
> +               dev_dbg(aspi->dev, "Trying HCLK/%d [%08x] ...", i, tv);
> +               rc = data->calibrate(chip, i, golden_buf, test_buf);
> +               if (rc == 0)
> +                       best_div = i;
> +       }
> +
> +       /* Nothing found ? */
> +       if (best_div < 0) {
> +               dev_warn(aspi->dev, "No good frequency, using dumb slow");
> +       } else {
> +               dev_dbg(aspi->dev, "Found good read timings at HCLK/%d", best_div);
> +
> +               /* Record the freq */
> +               for (i = 0; i < ASPEED_SPI_MAX; i++)
> +                       chip->ctl_val[i] = (chip->ctl_val[i] & data->hclk_mask) |
> +                               ASPEED_SPI_HCLK_DIV(best_div);
> +       }
> +
> +no_calib:
> +       writel(chip->ctl_val[ASPEED_SPI_READ], chip->ctl);
> +       kfree(test_buf);
> +       return 0;
> +}
> +
> +#define TIMING_DELAY_DI                BIT(3)
> +#define TIMING_DELAY_HCYCLE_MAX        5
> +#define TIMING_REG_AST2600(chip)                               \
> +       ((chip)->aspi->regs + (chip)->aspi->data->timing +      \
> +        (chip)->cs * 4)
> +
> +static int aspeed_spi_ast2600_calibrate(struct aspeed_spi_chip *chip, u32 hdiv,
> +                                       const u8 *golden_buf, u8 *test_buf)
> +{
> +       struct aspeed_spi *aspi = chip->aspi;
> +       int hcycle;
> +       u32 shift = (hdiv - 2) << 3;
> +       u32 mask = ~(0xfu << shift);
> +       u32 fread_timing_val = 0;
> +
> +       for (hcycle = 0; hcycle <= TIMING_DELAY_HCYCLE_MAX; hcycle++) {
> +               int delay_ns;
> +               bool pass = false;
> +
> +               fread_timing_val &= mask;
> +               fread_timing_val |= hcycle << shift;
> +
> +               /* no DI input delay first  */
> +               writel(fread_timing_val, TIMING_REG_AST2600(chip));
> +               pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> +               dev_dbg(aspi->dev,
> +                       "  * [%08x] %d HCLK delay, DI delay none : %s",
> +                       fread_timing_val, hcycle, pass ? "PASS" : "FAIL");
> +               if (pass)
> +                       return 0;
> +
> +               /* Add DI input delays  */
> +               fread_timing_val &= mask;
> +               fread_timing_val |= (TIMING_DELAY_DI | hcycle) << shift;
> +
> +               for (delay_ns = 0; delay_ns < 0x10; delay_ns++) {
> +                       fread_timing_val &= ~(0xf << (4 + shift));
> +                       fread_timing_val |= delay_ns << (4 + shift);
> +
> +                       writel(fread_timing_val, TIMING_REG_AST2600(chip));
> +                       pass = aspeed_spi_check_reads(chip, golden_buf, test_buf);
> +                       dev_dbg(aspi->dev,
> +                               "  * [%08x] %d HCLK delay, DI delay %d.%dns : %s",
> +                               fread_timing_val, hcycle, (delay_ns + 1) / 2,
> +                               (delay_ns + 1) & 1 ? 5 : 5, pass ? "PASS" : "FAIL");
> +                       /*
> +                        * TODO: This is optimistic. We should look
> +                        * for a working interval and save the middle
> +                        * value in the read timing register.
> +                        */
> +                       if (pass)
> +                               return 0;
> +               }
> +       }
> +
> +       /* No good setting for this frequency */
> +       return -1;
> +}
> +
>  /*
>   * Platform definitions
>   */
> @@ -822,6 +1079,10 @@ static const struct aspeed_spi_data ast2400_fmc_data = {
>         .hastype       = true,
>         .we0           = 16,
>         .ctl0          = CE0_CTRL_REG,
> +       .timing        = CE0_TIMING_COMPENSATION_REG,
> +       .hclk_mask     = 0xfffff0ff,
> +       .hdiv_max      = 1,
> +       .calibrate     = aspeed_spi_calibrate,
>         .segment_start = aspeed_spi_segment_start,
>         .segment_end   = aspeed_spi_segment_end,
>         .segment_reg   = aspeed_spi_segment_reg,
> @@ -832,6 +1093,10 @@ static const struct aspeed_spi_data ast2400_spi_data = {
>         .hastype       = false,
>         .we0           = 0,
>         .ctl0          = 0x04,
> +       .timing        = 0x14,
> +       .hclk_mask     = 0xfffff0ff,
> +       .hdiv_max      = 1,
> +       .calibrate     = aspeed_spi_calibrate,
>         /* No segment registers */
>  };
>
> @@ -840,6 +1105,10 @@ static const struct aspeed_spi_data ast2500_fmc_data = {
>         .hastype       = true,
>         .we0           = 16,
>         .ctl0          = CE0_CTRL_REG,
> +       .timing        = CE0_TIMING_COMPENSATION_REG,
> +       .hclk_mask     = 0xfffff0ff,
> +       .hdiv_max      = 1,
> +       .calibrate     = aspeed_spi_calibrate,
>         .segment_start = aspeed_spi_segment_start,
>         .segment_end   = aspeed_spi_segment_end,
>         .segment_reg   = aspeed_spi_segment_reg,
> @@ -850,6 +1119,10 @@ static const struct aspeed_spi_data ast2500_spi_data = {
>         .hastype       = false,
>         .we0           = 16,
>         .ctl0          = CE0_CTRL_REG,
> +       .timing        = CE0_TIMING_COMPENSATION_REG,
> +       .hclk_mask     = 0xfffff0ff,
> +       .hdiv_max      = 1,
> +       .calibrate     = aspeed_spi_calibrate,
>         .segment_start = aspeed_spi_segment_start,
>         .segment_end   = aspeed_spi_segment_end,
>         .segment_reg   = aspeed_spi_segment_reg,
> @@ -861,6 +1134,10 @@ static const struct aspeed_spi_data ast2600_fmc_data = {
>         .mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
>         .we0           = 16,
>         .ctl0          = CE0_CTRL_REG,
> +       .timing        = CE0_TIMING_COMPENSATION_REG,
> +       .hclk_mask     = 0xf0fff0ff,
> +       .hdiv_max      = 2,
> +       .calibrate     = aspeed_spi_ast2600_calibrate,
>         .segment_start = aspeed_spi_segment_ast2600_start,
>         .segment_end   = aspeed_spi_segment_ast2600_end,
>         .segment_reg   = aspeed_spi_segment_ast2600_reg,
> @@ -872,6 +1149,10 @@ static const struct aspeed_spi_data ast2600_spi_data = {
>         .mode_bits     = SPI_RX_QUAD | SPI_RX_QUAD,
>         .we0           = 16,
>         .ctl0          = CE0_CTRL_REG,
> +       .timing        = CE0_TIMING_COMPENSATION_REG,
> +       .hclk_mask     = 0xf0fff0ff,
> +       .hdiv_max      = 2,
> +       .calibrate     = aspeed_spi_ast2600_calibrate,
>         .segment_start = aspeed_spi_segment_ast2600_start,
>         .segment_end   = aspeed_spi_segment_ast2600_end,
>         .segment_reg   = aspeed_spi_segment_ast2600_reg,
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ