[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <57b4f49d-ae93-89d4-20ed-43fdd580e0d3@foss.st.com>
Date: Fri, 25 Mar 2022 15:01:45 +0100
From: Yann Gautier <yann.gautier@...s.st.com>
To: Ulf Hansson <ulf.hansson@...aro.org>
CC: Christophe Kerello <christophe.kerello@...s.st.com>,
Ludovic Barre <ludovic.barre@...s.st.com>,
Maxime Coquelin <mcoquelin.stm32@...il.com>,
Alexandre Torgue <alexandre.torgue@...s.st.com>,
Philipp Zabel <p.zabel@...gutronix.de>,
Linus Walleij <linus.walleij@...aro.org>,
<linux-mmc@...r.kernel.org>,
<linux-stm32@...md-mailman.stormreply.com>,
<linux-arm-kernel@...ts.infradead.org>,
<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 2/2] mmc: mmci: stm32: use a buffer for unaligned DMA
requests
On 3/25/22 14:43, Ulf Hansson wrote:
> On Thu, 24 Mar 2022 at 17:23, Yann Gautier <yann.gautier@...s.st.com> wrote:
>>
>> On 3/24/22 12:55, Ulf Hansson wrote:
>>> On Thu, 17 Mar 2022 at 12:19, Yann Gautier <yann.gautier@...s.st.com> wrote:
>>>>
>>>> In SDIO mode, the sg list for requests can be unaligned with what the
>>>> STM32 SDMMC internal DMA can support. In that case, instead of failing,
>>>> use a temporary bounce buffer to copy from/to the sg list.
>>>> This buffer is limited to 1MB. But for that we need to also limit
>>>> max_req_size to 1MB. It has not shown any throughput penalties for
>>>> SD-cards or eMMC.
>>>>
>>>> Signed-off-by: Yann Gautier <yann.gautier@...s.st.com>
>>>> ---
>>>> drivers/mmc/host/mmci_stm32_sdmmc.c | 80 +++++++++++++++++++++++------
>>>> 1 file changed, 63 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
>>>> index 4566d7fc9055..a4414e32800f 100644
>>>> --- a/drivers/mmc/host/mmci_stm32_sdmmc.c
>>>> +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
>>>> @@ -43,6 +43,9 @@ struct sdmmc_lli_desc {
>>>> struct sdmmc_idma {
>>>> dma_addr_t sg_dma;
>>>> void *sg_cpu;
>>>> + dma_addr_t bounce_dma_addr;
>>>> + void *bounce_buf;
>>>> + bool use_bounce_buffer;
>>>> };
>>>>
>>>> struct sdmmc_dlyb {
>>>> @@ -54,6 +57,7 @@ struct sdmmc_dlyb {
>>>> static int sdmmc_idma_validate_data(struct mmci_host *host,
>>>> struct mmc_data *data)
>>>> {
>>>> + struct sdmmc_idma *idma = host->dma_priv;
>>>> struct scatterlist *sg;
>>>> int i;
>>>>
>>>> @@ -61,21 +65,23 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
>>>> * idma has constraints on idmabase & idmasize for each element
>>>> * excepted the last element which has no constraint on idmasize
>>>> */
>>>> + idma->use_bounce_buffer = false;
>>>> for_each_sg(data->sg, sg, data->sg_len - 1, i) {
>>>> if (!IS_ALIGNED(sg->offset, sizeof(u32)) ||
>>>> !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) {
>>>> - dev_err(mmc_dev(host->mmc),
>>>> + dev_dbg(mmc_dev(host->mmc),
>>>> "unaligned scatterlist: ofst:%x length:%d\n",
>>>> data->sg->offset, data->sg->length);
>>>> - return -EINVAL;
>>>> + idma->use_bounce_buffer = true;
>>>> + return 0;
>>>> }
>>>> }
>>>>
>>>> if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
>>>> - dev_err(mmc_dev(host->mmc),
>>>> + dev_dbg(mmc_dev(host->mmc),
>>>> "unaligned last scatterlist: ofst:%x length:%d\n",
>>>> data->sg->offset, data->sg->length);
>>>> - return -EINVAL;
>>>> + idma->use_bounce_buffer = true;
>>>> }
>>>>
>>>> return 0;
>>>> @@ -84,18 +90,29 @@ static int sdmmc_idma_validate_data(struct mmci_host *host,
>>>> static int _sdmmc_idma_prep_data(struct mmci_host *host,
>>>> struct mmc_data *data)
>>>> {
>>>> - int n_elem;
>>>> + struct sdmmc_idma *idma = host->dma_priv;
>>>>
>>>> - n_elem = dma_map_sg(mmc_dev(host->mmc),
>>>> - data->sg,
>>>> - data->sg_len,
>>>> - mmc_get_dma_dir(data));
>>>> + if (idma->use_bounce_buffer) {
>>>> + if (data->flags & MMC_DATA_WRITE) {
>>>> + unsigned int xfer_bytes = data->blksz * data->blocks;
>>>>
>>>> - if (!n_elem) {
>>>> - dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
>>>> - return -EINVAL;
>>>> - }
>>>> + sg_copy_to_buffer(data->sg, data->sg_len,
>>>> + idma->bounce_buf, xfer_bytes);
>>>> + dma_wmb();
>>>> + }
>>>> + } else {
>>>> + int n_elem;
>>>> +
>>>> + n_elem = dma_map_sg(mmc_dev(host->mmc),
>>>> + data->sg,
>>>> + data->sg_len,
>>>> + mmc_get_dma_dir(data));
>>>>
>>>> + if (!n_elem) {
>>>> + dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n");
>>>> + return -EINVAL;
>>>> + }
>>>> + }
>>>> return 0;
>>>> }
>>>>
>>>> @@ -112,8 +129,19 @@ static int sdmmc_idma_prep_data(struct mmci_host *host,
>>>> static void sdmmc_idma_unprep_data(struct mmci_host *host,
>>>> struct mmc_data *data, int err)
>>>> {
>>>> - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
>>>> - mmc_get_dma_dir(data));
>>>> + struct sdmmc_idma *idma = host->dma_priv;
>>>> +
>>>> + if (idma->use_bounce_buffer) {
>>>> + if (data->flags & MMC_DATA_READ) {
>>>> + unsigned int xfer_bytes = data->blksz * data->blocks;
>>>> +
>>>> + sg_copy_from_buffer(data->sg, data->sg_len,
>>>> + idma->bounce_buf, xfer_bytes);
>>>> + }
>>>> + } else {
>>>> + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
>>>> + mmc_get_dma_dir(data));
>>>> + }
>>>> }
>>>>
>>>> static int sdmmc_idma_setup(struct mmci_host *host)
>>>> @@ -137,6 +165,16 @@ static int sdmmc_idma_setup(struct mmci_host *host)
>>>> host->mmc->max_segs = SDMMC_LLI_BUF_LEN /
>>>> sizeof(struct sdmmc_lli_desc);
>>>> host->mmc->max_seg_size = host->variant->stm32_idmabsize_mask;
>>>> +
>>>> + host->mmc->max_req_size = SZ_1M;
>>>> + idma->bounce_buf = dmam_alloc_coherent(dev,
>>>> + host->mmc->max_req_size,
>>>> + &idma->bounce_dma_addr,
>>>> + GFP_KERNEL);
>>>> + if (!idma->bounce_buf) {
>>>> + dev_err(dev, "Unable to map allocate DMA bounce buffer.\n");
>>>> + return -ENOMEM;
>>>
>> Hi Ulf,
>>
>>> If we fail to allocate the 1M bounce buffer, then we end up always
>>> using a PIO based mode, right?
>>>
>>> Perhaps we can allow the above allocation to fail, but then limit us
>>> to use DMA only when the buffers are properly aligned? Would that
>>> work?
>>>
>> We have never supported PIO mode with STM32 variant.
>> We only support DMA single buffer or DMA LLI.
>> As we cannot have DMA LLI for unaligned access, we'll default to single
>> mode.
>
> Right, I was looking at the legacy variant, which uses PIO as
> fallback. Sorry for my ignorance.
>
>> If allocation fails, it then won't work.
>
> Right, but that's only part of the issue, I think.
>
>> Maybe we shouldn't fail here, and just check idma->bounce_buf in
>> validate data function. If buffer is not allocated, we just return
>> -EINVAL as it was done before.
>
> Yes, something along those lines. However, there is another problem
> too, which is that the allocation will be done for each instance of
> the host that is probed. In all cases but the SDIO case, this would be
> a waste, right?
>
> Perhaps we should manage the allocation in the validate function too
> (de-allocation should be handled at ->remove()). In this way, the
> buffer will only be allocated when it's actually needed. Yes, it would
> add a latency while serving the *first* request that has unaligned
> buffers, but I guess we can live with that?
>
Hi Ulf,
That makes sense, I'll rework the validate data function with this.
I'll push a new version soon.
Thanks,
Yann
>>
>> Best regards,
>> Yann
>
> Kind regards
> Uffe
>
>>
>>>> + }
>>>> } else {
>>>> host->mmc->max_segs = 1;
>>>> host->mmc->max_seg_size = host->mmc->max_req_size;
>>>> @@ -154,8 +192,16 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl)
>>>> struct scatterlist *sg;
>>>> int i;
>>>>
>>>> - if (!host->variant->dma_lli || data->sg_len == 1) {
>>>> - writel_relaxed(sg_dma_address(data->sg),
>>>> + if (!host->variant->dma_lli || data->sg_len == 1 ||
>>>> + idma->use_bounce_buffer) {
>>>> + u32 dma_addr;
>>>> +
>>>> + if (idma->use_bounce_buffer)
>>>> + dma_addr = idma->bounce_dma_addr;
>>>> + else
>>>> + dma_addr = sg_dma_address(data->sg);
>>>> +
>>>> + writel_relaxed(dma_addr,
>>>> host->base + MMCI_STM32_IDMABASE0R);
>>>> writel_relaxed(MMCI_STM32_IDMAEN,
>>>> host->base + MMCI_STM32_IDMACTRLR);
>>>
>>> Kind regards
>>> Uffe
>>
Powered by blists - more mailing lists