lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0f2e18a9-0a58-43fe-99b2-f5010abf8ce5@vivo.com>
Date: Tue, 20 Aug 2024 09:41:52 +0800
From: Huan Yang <link@...o.com>
To: "Kasireddy, Vivek" <vivek.kasireddy@...el.com>,
 Gerd Hoffmann <kraxel@...hat.com>, Sumit Semwal <sumit.semwal@...aro.org>,
 Christian König <christian.koenig@....com>,
 "dri-devel@...ts.freedesktop.org" <dri-devel@...ts.freedesktop.org>,
 "linux-media@...r.kernel.org" <linux-media@...r.kernel.org>,
 "linaro-mm-sig@...ts.linaro.org" <linaro-mm-sig@...ts.linaro.org>,
 "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Cc: "opensource.kernel@...o.com" <opensource.kernel@...o.com>
Subject: Re: [PATCH v3 5/5] udmabuf: remove udmabuf_folio


在 2024/8/17 9:05, Kasireddy, Vivek 写道:
> Hi Huan,
>
>> Currently, udmabuf handles folio by creating an unpin list to record
>> each folio obtained from the list and unpinning them when released. To
>> maintain this approach, many data structures have been established.
>>
>> However, maintaining this type of data structure requires a significant
>> amount of memory and traversing the list is a substantial overhead,
>> which is not friendly to the CPU cache.
>>
>> Considering that during creation, we arranged the folio array in the
>> order of pin and set the offset according to pgcnt.
>>
>> We actually don't need to use unpin_list to unpin during release.
>> Instead, we can iterate through the folios array during release and
>> unpin any folio that is different from the ones previously accessed.
> No, that won't work because iterating the folios array doesn't tell you
> anything about how many times a folio was pinned (via memfd_pin_folios()),
> as a folio could be part of multiple ranges.
>
> For example, if userspace provides ranges 64..128 and 256..512 (assuming
> these are 4k sized subpage offsets and we have a 2MB hugetlb folio), then
> the same folio would cover both ranges and there would be 2 entries for
> this folio in unpin_list. But, with your logic, we'd be erroneously unpinning
> it only once.
:(, too complex. I got a misunderstand, thank you.
>
> Not sure if there are any great solutions available to address this situation,
> but one option I can think of is to convert unpin_list to unpin array (dynamically
> resized with krealloc?) and track its length separately. Or, as suggested earlier,

Maybe just a folio array(size pagecount) set each folio like unpin list?

even if waste some memory, but access will fast than list. (and low than 
unpin_list)

> another way is to not use unpin_list for memfds backed by shmem, but I suspect
> this may not work if THP is enabled.
>
> Thanks,
> Vivek
>
>> By this, not only saves the overhead of the udmabuf_folio data structure
>> but also makes array access more cache-friendly.
>>
>> Signed-off-by: Huan Yang <link@...o.com>
>> ---
>>   drivers/dma-buf/udmabuf.c | 68 +++++++++++++++++----------------------
>>   1 file changed, 30 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
>> index 8f9cb0e2e71a..1e7f46c33d1a 100644
>> --- a/drivers/dma-buf/udmabuf.c
>> +++ b/drivers/dma-buf/udmabuf.c
>> @@ -26,16 +26,19 @@ MODULE_PARM_DESC(size_limit_mb, "Max size of a
>> dmabuf, in megabytes. Default is
>>
>>   struct udmabuf {
>>   	pgoff_t pagecount;
>> -	struct folio **folios;
>>   	struct sg_table *sg;
>>   	struct miscdevice *device;
>> +	struct folio **folios;
>> +	/**
>> +	 * offset in folios array's folio, byte unit.
>> +	 * udmabuf can use either shmem or hugetlb pages, an array based
>> on
>> +	 * pages may not be suitable.
>> +	 * Especially when HVO is enabled, the tail page will be released,
>> +	 * so our reference to the page will no longer be correct.
>> +	 * Hence, it's necessary to record the offset in order to reference
>> +	 * the correct PFN within the folio.
>> +	 */
>>   	pgoff_t *offsets;
>> -	struct list_head unpin_list;
>> -};
>> -
>> -struct udmabuf_folio {
>> -	struct folio *folio;
>> -	struct list_head list;
>>   };
>>
>>   static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct
>> *vma)
>> @@ -160,32 +163,28 @@ static void unmap_udmabuf(struct
>> dma_buf_attachment *at,
>>   	return put_sg_table(at->dev, sg, direction);
>>   }
>>
>> -static void unpin_all_folios(struct list_head *unpin_list)
>> +/**
>> + * unpin_all_folios:		unpin each folio we pinned in create
>> + * The udmabuf set all folio in folios and pinned it, but for large folio,
>> + * We may have only used a small portion of the physical in the folio.
>> + * we will repeatedly, sequentially set the folio into the array to ensure
>> + * that the offset can index the correct folio at the corresponding index.
>> + * Hence, we only need to unpin the first iterred folio.
>> + */
>> +static void unpin_all_folios(struct udmabuf *ubuf)
>>   {
>> -	struct udmabuf_folio *ubuf_folio;
>> -
>> -	while (!list_empty(unpin_list)) {
>> -		ubuf_folio = list_first_entry(unpin_list,
>> -					      struct udmabuf_folio, list);
>> -		unpin_folio(ubuf_folio->folio);
>> -
>> -		list_del(&ubuf_folio->list);
>> -		kfree(ubuf_folio);
>> -	}
>> -}
>> +	pgoff_t pg;
>> +	struct folio *last = NULL;
>>
>> -static int add_to_unpin_list(struct list_head *unpin_list,
>> -			     struct folio *folio)
>> -{
>> -	struct udmabuf_folio *ubuf_folio;
>> +	for (pg = 0; pg < ubuf->pagecount; pg++) {
>> +		struct folio *tmp = ubuf->folios[pg];
>>
>> -	ubuf_folio = kzalloc(sizeof(*ubuf_folio), GFP_KERNEL);
>> -	if (!ubuf_folio)
>> -		return -ENOMEM;
>> +		if (tmp == last)
>> +			continue;
>>
>> -	ubuf_folio->folio = folio;
>> -	list_add_tail(&ubuf_folio->list, unpin_list);
>> -	return 0;
>> +		last = tmp;
>> +		unpin_folio(tmp);
>> +	}
>>   }
>>
>>   static void release_udmabuf(struct dma_buf *buf)
>> @@ -196,7 +195,7 @@ static void release_udmabuf(struct dma_buf *buf)
>>   	if (ubuf->sg)
>>   		put_sg_table(dev, ubuf->sg, DMA_BIDIRECTIONAL);
>>
>> -	unpin_all_folios(&ubuf->unpin_list);
>> +	unpin_all_folios(ubuf);
>>   	kvfree(ubuf->offsets);
>>   	kvfree(ubuf->folios);
>>   	kfree(ubuf);
>> @@ -308,7 +307,6 @@ static long udmabuf_create(struct miscdevice
>> *device,
>>   	if (!ubuf)
>>   		return -ENOMEM;
>>
>> -	INIT_LIST_HEAD(&ubuf->unpin_list);
>>   	pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT;
>>   	for (i = 0; i < head->count; i++) {
>>   		if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
>> @@ -366,12 +364,6 @@ static long udmabuf_create(struct miscdevice
>> *device,
>>   			u32 k;
>>   			long fsize = folio_size(folios[j]);
>>
>> -			ret = add_to_unpin_list(&ubuf->unpin_list, folios[j]);
>> -			if (ret < 0) {
>> -				kfree(folios);
>> -				goto err;
>> -			}
>> -
>>   			for (k = pgoff; k < fsize; k += PAGE_SIZE) {
>>   				ubuf->folios[pgbuf] = folios[j];
>>   				ubuf->offsets[pgbuf] = k;
>> @@ -399,7 +391,7 @@ static long udmabuf_create(struct miscdevice
>> *device,
>>   err:
>>   	if (memfd)
>>   		fput(memfd);
>> -	unpin_all_folios(&ubuf->unpin_list);
>> +	unpin_all_folios(ubuf);
>>   	kvfree(ubuf->offsets);
>>   	kvfree(ubuf->folios);
>>   	kfree(ubuf);
>> --
>> 2.45.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ