linux-kernel - Re: [PATCH 1/5] udmabuf: cancel mmap page fault, direct map it

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <46d00d9a-b86b-4e4d-863f-48d71060ef2a@amd.com>
Date: Thu, 1 Aug 2024 12:50:53 +0200
From: Christian König <christian.koenig@....com>
To: Huan Yang <link@...o.com>, Gerd Hoffmann <kraxel@...hat.com>,
 Sumit Semwal <sumit.semwal@...aro.org>, dri-devel@...ts.freedesktop.org,
 linux-media@...r.kernel.org, linaro-mm-sig@...ts.linaro.org,
 linux-kernel@...r.kernel.org
Cc: opensource.kernel@...o.com
Subject: Re: [PATCH 1/5] udmabuf: cancel mmap page fault, direct map it

Am 01.08.24 um 12:45 schrieb Huan Yang:
> The current udmabuf mmap uses a page fault mechanism to populate the vma.
>
> However, the current udmabuf has already obtained and pinned the folio
> upon completion of the creation.This means that the physical memory has
> already been acquired, rather than being accessed dynamically. The
> current page fault method only saves some page table memory.
>
> As a result, the page fault mechanism has lost its purpose as a demanding
> page. Due to the fact that page fault requires trapping into kernel mode
> and filling in when accessing the corresponding virtual address in mmap,
> this means that user mode access to virtual addresses needs to trap into
> kernel mode.
>
> Therefore, when creating a large size udmabuf, this represents a
> considerable overhead.
>
> Therefore, the current patch removes the page fault method of mmap and
> instead fills it directly when mmap is triggered.
>
> Signed-off-by: Huan Yang <link@...o.com>
> ---
>   drivers/dma-buf/udmabuf.c | 70 ++++++++++++++++++++++-----------------
>   1 file changed, 39 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> index 047c3cd2ceff..d69aeada7367 100644
> --- a/drivers/dma-buf/udmabuf.c
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -38,36 +38,39 @@ struct udmabuf_folio {
>   	struct list_head list;
>   };
>   
> -static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
> -{
> -	struct vm_area_struct *vma = vmf->vma;
> -	struct udmabuf *ubuf = vma->vm_private_data;
> -	pgoff_t pgoff = vmf->pgoff;
> -	unsigned long pfn;
> -
> -	if (pgoff >= ubuf->pagecount)
> -		return VM_FAULT_SIGBUS;
> -
> -	pfn = folio_pfn(ubuf->folios[pgoff]);
> -	pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
> -
> -	return vmf_insert_pfn(vma, vmf->address, pfn);
> -}
> -
> -static const struct vm_operations_struct udmabuf_vm_ops = {
> -	.fault = udmabuf_vm_fault,
> -};
> +static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
> +				     enum dma_data_direction direction);
>   
>   static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
>   {
>   	struct udmabuf *ubuf = buf->priv;
> +	struct sg_table *table = ubuf->sg;
> +	unsigned long addr = vma->vm_start;
> +	struct sg_page_iter piter;
> +	int ret;
>   
>   	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
>   		return -EINVAL;
>   
> -	vma->vm_ops = &udmabuf_vm_ops;
> -	vma->vm_private_data = ubuf;
> -	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
> +	if (!table) {
> +		table = get_sg_table(NULL, buf, 0);
> +		if (IS_ERR(table))
> +			return PTR_ERR(table);
> +		ubuf->sg = table;
> +	}
> +
> +	for_each_sgtable_page(table, &piter, vma->vm_pgoff) {

That might not work correctly. We intentionally remove the pages from 
the sgtable when it is shared between devices.

Additional to that the sgtable is *not* a page container, but rather a 
DMA address container. So that here is also a rather bad idea from the 
design side.

Regards,
Christian.

> +		struct page *page = sg_page_iter_page(&piter);
> +
> +		ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE,
> +				      vma->vm_page_prot);
> +		if (ret)
> +			return ret;
> +		addr += PAGE_SIZE;
> +		if (addr >= vma->vm_end)
> +			return 0;
> +	}
> +
>   	return 0;
>   }
>   
> @@ -126,6 +129,10 @@ static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
>   		sg_set_folio(sgl, ubuf->folios[i], PAGE_SIZE,
>   			     ubuf->offsets[i]);
>   
> +	// if dev is NULL, no need to sync.
> +	if (!dev)
> +		return sg;
> +
>   	ret = dma_map_sgtable(dev, sg, direction, 0);
>   	if (ret < 0)
>   		goto err_map;
> @@ -206,20 +213,21 @@ static int begin_cpu_udmabuf(struct dma_buf *buf,
>   {
>   	struct udmabuf *ubuf = buf->priv;
>   	struct device *dev = ubuf->device->this_device;
> -	int ret = 0;
> +	struct sg_table *sg;
>   
> -	if (!ubuf->sg) {
> -		ubuf->sg = get_sg_table(dev, buf, direction);
> -		if (IS_ERR(ubuf->sg)) {
> -			ret = PTR_ERR(ubuf->sg);
> -			ubuf->sg = NULL;
> -		}
> -	} else {
> +	if (ubuf->sg) {
>   		dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents,
>   				    direction);
> +		return 0;
>   	}
>   
> -	return ret;
> +	sg = get_sg_table(dev, buf, direction);
> +	if (IS_ERR(sg))
> +		return PTR_ERR(sg);
> +
> +	ubuf->sg = sg;
> +
> +	return 0;
>   }
>   
>   static int end_cpu_udmabuf(struct dma_buf *buf,