[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0f2e18a9-0a58-43fe-99b2-f5010abf8ce5@vivo.com>
Date: Tue, 20 Aug 2024 09:41:52 +0800
From: Huan Yang <link@...o.com>
To: "Kasireddy, Vivek" <vivek.kasireddy@...el.com>,
Gerd Hoffmann <kraxel@...hat.com>, Sumit Semwal <sumit.semwal@...aro.org>,
Christian König <christian.koenig@....com>,
"dri-devel@...ts.freedesktop.org" <dri-devel@...ts.freedesktop.org>,
"linux-media@...r.kernel.org" <linux-media@...r.kernel.org>,
"linaro-mm-sig@...ts.linaro.org" <linaro-mm-sig@...ts.linaro.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Cc: "opensource.kernel@...o.com" <opensource.kernel@...o.com>
Subject: Re: [PATCH v3 5/5] udmabuf: remove udmabuf_folio
在 2024/8/17 9:05, Kasireddy, Vivek 写道:
> Hi Huan,
>
>> Currently, udmabuf handles folio by creating an unpin list to record
>> each folio obtained from the list and unpinning them when released. To
>> maintain this approach, many data structures have been established.
>>
>> However, maintaining this type of data structure requires a significant
>> amount of memory and traversing the list is a substantial overhead,
>> which is not friendly to the CPU cache.
>>
>> Considering that during creation, we arranged the folio array in the
>> order of pin and set the offset according to pgcnt.
>>
>> We actually don't need to use unpin_list to unpin during release.
>> Instead, we can iterate through the folios array during release and
>> unpin any folio that is different from the ones previously accessed.
> No, that won't work because iterating the folios array doesn't tell you
> anything about how many times a folio was pinned (via memfd_pin_folios()),
> as a folio could be part of multiple ranges.
>
> For example, if userspace provides ranges 64..128 and 256..512 (assuming
> these are 4k sized subpage offsets and we have a 2MB hugetlb folio), then
> the same folio would cover both ranges and there would be 2 entries for
> this folio in unpin_list. But, with your logic, we'd be erroneously unpinning
> it only once.
:(, too complex. I got a misunderstand, thank you.
>
> Not sure if there are any great solutions available to address this situation,
> but one option I can think of is to convert unpin_list to unpin array (dynamically
> resized with krealloc?) and track its length separately. Or, as suggested earlier,
Maybe just a folio array(size pagecount) set each folio like unpin list?
even if waste some memory, but access will fast than list. (and low than
unpin_list)
> another way is to not use unpin_list for memfds backed by shmem, but I suspect
> this may not work if THP is enabled.
>
> Thanks,
> Vivek
>
>> By this, not only saves the overhead of the udmabuf_folio data structure
>> but also makes array access more cache-friendly.
>>
>> Signed-off-by: Huan Yang <link@...o.com>
>> ---
>> drivers/dma-buf/udmabuf.c | 68 +++++++++++++++++----------------------
>> 1 file changed, 30 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>> index 8f9cb0e2e71a..1e7f46c33d1a 100644
>> --- a/drivers/dma-buf/udmabuf.c
>> +++ b/drivers/dma-buf/udmabuf.c
>> @@ -26,16 +26,19 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a
>> dmabuf, in megabytes. Default is
>>
>> struct udmabuf {
>> pgoff_t pagecount;
>> - struct folio **folios;
>> struct sg_table *sg;
>> struct miscdevice *device;
>> + struct folio **folios;
>> + /**
>> + * offset in folios array's folio, byte unit.
>> + * udmabuf can use either shmem or hugetlb pages, an array based
>> on
>> + * pages may not be suitable.
>> + * Especially when HVO is enabled, the tail page will be released,
>> + * so our reference to the page will no longer be correct.
>> + * Hence, it's necessary to record the offset in order to reference
>> + * the correct PFN within the folio.
>> + */
>> pgoff_t *offsets;
>> - struct list_head unpin_list;
>> -};
>> -
>> -struct udmabuf_folio {
>> - struct folio *folio;
>> - struct list_head list;
>> };
>>
>> static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct
>> *vma)
>> @@ -160,32 +163,28 @@ static void unmap_udmabuf(struct
>> dma_buf_attachment *at,
>> return put_sg_table(at->dev, sg, direction);
>> }
>>
>> -static void unpin_all_folios(struct list_head *unpin_list)
>> +/**
>> + * unpin_all_folios: unpin each folio we pinned in create
>> + * The udmabuf set all folio in folios and pinned it, but for large folio,
>> + * We may have only used a small portion of the physical in the folio.
>> + * we will repeatedly, sequentially set the folio into the array to ensure
>> + * that the offset can index the correct folio at the corresponding index.
>> + * Hence, we only need to unpin the first iterred folio.
>> + */
>> +static void unpin_all_folios(struct udmabuf *ubuf)
>> {
>> - struct udmabuf_folio *ubuf_folio;
>> -
>> - while (!list_empty(unpin_list)) {
>> - ubuf_folio = list_first_entry(unpin_list,
>> - struct udmabuf_folio, list);
>> - unpin_folio(ubuf_folio->folio);
>> -
>> - list_del(&ubuf_folio->list);
>> - kfree(ubuf_folio);
>> - }
>> -}
>> + pgoff_t pg;
>> + struct folio *last = NULL;
>>
>> -static int add_to_unpin_list(struct list_head *unpin_list,
>> - struct folio *folio)
>> -{
>> - struct udmabuf_folio *ubuf_folio;
>> + for (pg = 0; pg < ubuf->pagecount; pg++) {
>> + struct folio *tmp = ubuf->folios[pg];
>>
>> - ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
>> - if (!ubuf_folio)
>> - return -ENOMEM;
>> + if (tmp == last)
>> + continue;
>>
>> - ubuf_folio->folio = folio;
>> - list_add_tail(&ubuf_folio->list, unpin_list);
>> - return 0;
>> + last = tmp;
>> + unpin_folio(tmp);
>> + }
>> }
>>
>> static void release_udmabuf(struct dma_buf *buf)
>> @@ -196,7 +195,7 @@ static void release_udmabuf(struct dma_buf *buf)
>> if (ubuf->sg)
>> put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
>>
>> - unpin_all_folios(&ubuf->unpin_list);
>> + unpin_all_folios(ubuf);
>> kvfree(ubuf->offsets);
>> kvfree(ubuf->folios);
>> kfree(ubuf);
>> @@ -308,7 +307,6 @@ static long udmabuf_create(struct miscdevice
>> *device,
>> if (!ubuf)
>> return -ENOMEM;
>>
>> - INIT_LIST_HEAD(&ubuf->unpin_list);
>> pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
>> for (i = 0; i < head->count; i++) {
>> if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
>> @@ -366,12 +364,6 @@ static long udmabuf_create(struct miscdevice
>> *device,
>> u32 k;
>> long fsize = folio_size(folios[j]);
>>
>> - ret = add_to_unpin_list(&ubuf->unpin_list, folios[j]);
>> - if (ret < 0) {
>> - kfree(folios);
>> - goto err;
>> - }
>> -
>> for (k = pgoff; k < fsize; k += PAGE_SIZE) {
>> ubuf->folios[pgbuf] = folios[j];
>> ubuf->offsets[pgbuf] = k;
>> @@ -399,7 +391,7 @@ static long udmabuf_create(struct miscdevice
>> *device,
>> err:
>> if (memfd)
>> fput(memfd);
>> - unpin_all_folios(&ubuf->unpin_list);
>> + unpin_all_folios(ubuf);
>> kvfree(ubuf->offsets);
>> kvfree(ubuf->folios);
>> kfree(ubuf);
>> --
>> 2.45.2
Powered by blists - more mailing lists