lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <VI1PR04MB49607013A9221E3D3657748492160@VI1PR04MB4960.eurprd04.prod.outlook.com>
Date:   Tue, 27 Oct 2020 12:30:35 +0000
From:   Sherry Sun <sherry.sun@....com>
To:     Christoph Hellwig <hch@...radead.org>
CC:     "gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>,
        "sudeep.dutt@...el.com" <sudeep.dutt@...el.com>,
        "ashutosh.dixit@...el.com" <ashutosh.dixit@...el.com>,
        "arnd@...db.de" <arnd@...db.de>, "kishon@...com" <kishon@...com>,
        "lorenzo.pieralisi@....com" <lorenzo.pieralisi@....com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "linux-pci@...r.kernel.org" <linux-pci@...r.kernel.org>,
        dl-linux-imx <linux-imx@....com>, Andy Duan <fugang.duan@....com>
Subject: RE: [PATCH V4 1/2] misc: vop: change the way of allocating vring and
 device page

Hi Christoph,

Thank you for improving the code. I think your code makes vop_mmap looks clearer.
I have tested it on imx8qm platform(arm64 architecture) and it works well.
I will send the code together in v5.

Best regards
Sherry


> Subject: Re: [PATCH V4 1/2] misc: vop: change the way of allocating vring and
> device page
> 
> This looks much better, but we need to be careful to restore the original
> vm_start / vm_end.  What do you think about the version below, which also
> simplifies vop_mmap a lot (completely untested):
> 
> ---
> From 2de72bf7444ee187a7576962d746d482c5bdd593 Mon Sep 17 00:00:00
> 2001
> From: Sherry Sun <sherry.sun@....com>
> Date: Mon, 26 Oct 2020 16:53:34 +0800
> Subject: misc: vop: change the way of allocating vring and device page
> 
> Allocate vrings use dma_alloc_coherent is a common way in kernel. As the
> memory interacted between two systems should use consistent memory to
> avoid caching effects, same as device page memory.
> 
> The orginal way use __get_free_pages and dma_map_single to allocate and
> map vring, but not use dma_sync_single_for_cpu/device api to sync the
> changes of vring between EP and RC, which will cause memory
> synchronization problem for those devices which don't support hardware
> dma coherent.
> 
> Also change to use dma_mmap_coherent for mmap callback to map the
> device page and vring memory to userspace.
> 
> Signed-off-by: Joakim Zhang <qiangqing.zhang@....com>
> Signed-off-by: Sherry Sun <sherry.sun@....com>
> ---
>  drivers/misc/mic/bus/vop_bus.h    |   2 +
>  drivers/misc/mic/host/mic_boot.c  |   9 ++
>  drivers/misc/mic/host/mic_main.c  |  43 +++-------
> drivers/misc/mic/vop/vop_vringh.c | 135 ++++++++++++------------------
>  4 files changed, 77 insertions(+), 112 deletions(-)
> 
> diff --git a/drivers/misc/mic/bus/vop_bus.h
> b/drivers/misc/mic/bus/vop_bus.h index 4fa02808c1e27d..e21c06aeda7a31
> 100644
> --- a/drivers/misc/mic/bus/vop_bus.h
> +++ b/drivers/misc/mic/bus/vop_bus.h
> @@ -75,6 +75,7 @@ struct vop_driver {
>   *                 node to add/remove/configure virtio devices.
>   * @get_dp: Get access to the virtio device page used by the self
>   *          node to add/remove/configure virtio devices.
> + * @dp_mmap: Map the virtio device page to userspace.
>   * @send_intr: Send an interrupt to the peer node on a specified doorbell.
>   * @remap: Map a buffer with the specified DMA address and length.
>   * @unmap: Unmap a buffer previously mapped.
> @@ -92,6 +93,7 @@ struct vop_hw_ops {
>  	void (*ack_interrupt)(struct vop_device *vpdev, int num);
>  	void __iomem * (*get_remote_dp)(struct vop_device *vpdev);
>  	void * (*get_dp)(struct vop_device *vpdev);
> +	int (*dp_mmap)(struct vop_device *vpdev, struct vm_area_struct
> *vma);
>  	void (*send_intr)(struct vop_device *vpdev, int db);
>  	void __iomem * (*remap)(struct vop_device *vpdev,
>  				  dma_addr_t pa, size_t len);
> diff --git a/drivers/misc/mic/host/mic_boot.c
> b/drivers/misc/mic/host/mic_boot.c
> index 8cb85b8b3e199b..44ed918b49b4d2 100644
> --- a/drivers/misc/mic/host/mic_boot.c
> +++ b/drivers/misc/mic/host/mic_boot.c
> @@ -89,6 +89,14 @@ static void *__mic_get_dp(struct vop_device *vpdev)
>  	return mdev->dp;
>  }
> 
> +static int __mic_dp_mmap(struct vop_device *vpdev, struct
> +vm_area_struct *vma) {
> +	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
> +
> +	return dma_mmap_coherent(&mdev->pdev->dev, vma, mdev->dp,
> +				 mdev->dp_dma_addr, MIC_DP_SIZE);
> +}
> +
>  static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)  {
>  	return NULL;
> @@ -120,6 +128,7 @@ static struct vop_hw_ops vop_hw_ops = {
>  	.ack_interrupt = __mic_ack_interrupt,
>  	.next_db = __mic_next_db,
>  	.get_dp = __mic_get_dp,
> +	.dp_mmap = __mic_dp_mmap,
>  	.get_remote_dp = __mic_get_remote_dp,
>  	.send_intr = __mic_send_intr,
>  	.remap = __mic_ioremap,
> diff --git a/drivers/misc/mic/host/mic_main.c
> b/drivers/misc/mic/host/mic_main.c
> index ea4608527ea04d..fab87a72a9a4a5 100644
> --- a/drivers/misc/mic/host/mic_main.c
> +++ b/drivers/misc/mic/host/mic_main.c
> @@ -10,6 +10,7 @@
>  #include <linux/module.h>
>  #include <linux/pci.h>
>  #include <linux/poll.h>
> +#include <linux/dma-mapping.h>
> 
>  #include <linux/mic_common.h>
>  #include "../common/mic_dev.h"
> @@ -45,33 +46,6 @@ MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
>  /* ID allocator for MIC devices */
>  static struct ida g_mic_ida;
> 
> -/* Initialize the device page */
> -static int mic_dp_init(struct mic_device *mdev) -{
> -	mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
> -	if (!mdev->dp)
> -		return -ENOMEM;
> -
> -	mdev->dp_dma_addr = mic_map_single(mdev,
> -		mdev->dp, MIC_DP_SIZE);
> -	if (mic_map_error(mdev->dp_dma_addr)) {
> -		kfree(mdev->dp);
> -		dev_err(&mdev->pdev->dev, "%s %d err %d\n",
> -			__func__, __LINE__, -ENOMEM);
> -		return -ENOMEM;
> -	}
> -	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev-
> >dp_dma_addr);
> -	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev-
> >dp_dma_addr >> 32);
> -	return 0;
> -}
> -
> -/* Uninitialize the device page */
> -static void mic_dp_uninit(struct mic_device *mdev) -{
> -	mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE);
> -	kfree(mdev->dp);
> -}
> -
>  /**
>   * mic_ops_init: Initialize HW specific operation tables.
>   *
> @@ -227,11 +201,16 @@ static int mic_probe(struct pci_dev *pdev,
> 
>  	pci_set_drvdata(pdev, mdev);
> 
> -	rc = mic_dp_init(mdev);
> -	if (rc) {
> -		dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
> +	mdev->dp = dma_alloc_coherent(&pdev->dev, MIC_DP_SIZE,
> +				      &mdev->dp_dma_addr, GFP_KERNEL);
> +	if (!mdev->dp) {
> +		dev_err(&pdev->dev, "failed to allocate device page\n");
>  		goto smpt_uninit;
>  	}
> +
> +	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev-
> >dp_dma_addr);
> +	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev-
> >dp_dma_addr >> 32);
> +
>  	mic_bootparam_init(mdev);
>  	mic_create_debug_dir(mdev);
> 
> @@ -244,7 +223,7 @@ static int mic_probe(struct pci_dev *pdev,
>  	return 0;
>  cleanup_debug_dir:
>  	mic_delete_debug_dir(mdev);
> -	mic_dp_uninit(mdev);
> +	dma_free_coherent(&pdev->dev, MIC_DP_SIZE, mdev->dp,
> +mdev->dp_dma_addr);
>  smpt_uninit:
>  	mic_smpt_uninit(mdev);
>  free_interrupts:
> @@ -283,7 +262,7 @@ static void mic_remove(struct pci_dev *pdev)
> 
>  	cosm_unregister_device(mdev->cosm_dev);
>  	mic_delete_debug_dir(mdev);
> -	mic_dp_uninit(mdev);
> +	dma_free_coherent(&pdev->dev, MIC_DP_SIZE, mdev->dp,
> +mdev->dp_dma_addr);
>  	mic_smpt_uninit(mdev);
>  	mic_free_interrupts(mdev, pdev);
>  	iounmap(mdev->aper.va);
> diff --git a/drivers/misc/mic/vop/vop_vringh.c
> b/drivers/misc/mic/vop/vop_vringh.c
> index 7014ffe88632e5..6835648d577d57 100644
> --- a/drivers/misc/mic/vop/vop_vringh.c
> +++ b/drivers/misc/mic/vop/vop_vringh.c
> @@ -298,9 +298,8 @@ static int vop_virtio_add_device(struct vop_vdev
> *vdev,
>  		mutex_init(&vvr->vr_mutex);
>  		vr_size = PAGE_ALIGN(round_up(vring_size(num,
> MIC_VIRTIO_RING_ALIGN), 4) +
>  			sizeof(struct _mic_vring_info));
> -		vr->va = (void *)
> -			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> -					 get_order(vr_size));
> +		vr->va = dma_alloc_coherent(vop_dev(vdev), vr_size,
> &vr_addr,
> +					    GFP_KERNEL);
>  		if (!vr->va) {
>  			ret = -ENOMEM;
>  			dev_err(vop_dev(vdev), "%s %d err %d\n", @@ -
> 310,15 +309,6 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
>  		vr->len = vr_size;
>  		vr->info = vr->va + round_up(vring_size(num,
> MIC_VIRTIO_RING_ALIGN), 4);
>  		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id
> + i);
> -		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
> -					 DMA_BIDIRECTIONAL);
> -		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
> -			free_pages((unsigned long)vr->va,
> get_order(vr_size));
> -			ret = -ENOMEM;
> -			dev_err(vop_dev(vdev), "%s %d err %d\n",
> -				__func__, __LINE__, ret);
> -			goto err;
> -		}
>  		vqconfig[i].address = cpu_to_le64(vr_addr);
> 
>  		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
> @@ -339,11 +329,9 @@ static int vop_virtio_add_device(struct vop_vdev
> *vdev,
>  		dev_dbg(&vpdev->dev,
>  			"%s %d index %d va %p info %p vr_size 0x%x\n",
>  			__func__, __LINE__, i, vr->va, vr->info, vr_size);
> -		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
> -					get_order(VOP_INT_DMA_BUF_SIZE));
> -		vvr->buf_da = dma_map_single(&vpdev->dev,
> -					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
> -					  DMA_BIDIRECTIONAL);
> +		vvr->buf = dma_alloc_coherent(vop_dev(vdev),
> +					      VOP_INT_DMA_BUF_SIZE,
> +					      &vvr->buf_da, GFP_KERNEL);
>  	}
> 
>  	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
> @@ -382,10 +370,8 @@ static int vop_virtio_add_device(struct vop_vdev
> *vdev,
>  	for (j = 0; j < i; j++) {
>  		struct vop_vringh *vvr = &vdev->vvr[j];
> 
> -		dma_unmap_single(&vpdev->dev,
> le64_to_cpu(vqconfig[j].address),
> -				 vvr->vring.len, DMA_BIDIRECTIONAL);
> -		free_pages((unsigned long)vvr->vring.va,
> -			   get_order(vvr->vring.len));
> +		dma_free_coherent(vop_dev(vdev), vvr->vring.len, vvr-
> >vring.va,
> +				  le64_to_cpu(vqconfig[j].address));
>  	}
>  	return ret;
>  }
> @@ -433,17 +419,12 @@ static void vop_virtio_del_device(struct vop_vdev
> *vdev)
>  	for (i = 0; i < vdev->dd->num_vq; i++) {
>  		struct vop_vringh *vvr = &vdev->vvr[i];
> 
> -		dma_unmap_single(&vpdev->dev,
> -				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
> -				 DMA_BIDIRECTIONAL);
> -		free_pages((unsigned long)vvr->buf,
> -			   get_order(VOP_INT_DMA_BUF_SIZE));
> +		dma_free_coherent(vop_dev(vdev),
> VOP_INT_DMA_BUF_SIZE,
> +				  vvr->buf, vvr->buf_da);
>  		vringh_kiov_cleanup(&vvr->riov);
>  		vringh_kiov_cleanup(&vvr->wiov);
> -		dma_unmap_single(&vpdev->dev,
> le64_to_cpu(vqconfig[i].address),
> -				 vvr->vring.len, DMA_BIDIRECTIONAL);
> -		free_pages((unsigned long)vvr->vring.va,
> -			   get_order(vvr->vring.len));
> +		dma_free_coherent(vop_dev(vdev), vvr->vring.len, vvr-
> >vring.va,
> +				  le64_to_cpu(vqconfig[i].address));
>  	}
>  	/*
>  	 * Order the type update with previous stores. This write barrier @@
> -1042,13 +1023,27 @@ static __poll_t vop_poll(struct file *f, poll_table *wait)
>  	return mask;
>  }
> 
> -static inline int
> -vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
> -		 unsigned long *size, unsigned long *pa)
> +/*
> + * Maps the device page and virtio rings to user space for readonly access.
> + */
> +static int vop_mmap(struct file *f, struct vm_area_struct *vma)
>  {
> -	struct vop_device *vpdev = vdev->vpdev;
> -	unsigned long start = MIC_DP_SIZE;
> -	int i;
> +	struct vop_vdev *vdev = f->private_data;
> +	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
> +	unsigned long orig_start = vma->vm_start;
> +	unsigned long orig_end = vma->vm_end;
> +	int err, i;
> +
> +	if (!vdev->vpdev->hw_ops->dp_mmap)
> +		return -EINVAL;
> +	if (vma->vm_pgoff)
> +		return -EINVAL;
> +	if (vma->vm_flags & VM_WRITE)
> +		return -EACCES;
> +
> +	err = vop_vdev_inited(vdev);
> +	if (err)
> +		return err;
> 
>  	/*
>  	 * MMAP interface is as follows:
> @@ -1057,58 +1052,38 @@ vop_query_offset(struct vop_vdev *vdev,
> unsigned long offset,
>  	 * 0x1000				first vring
>  	 * 0x1000 + size of 1st vring		second vring
>  	 * ....
> +	 *
> +	 * We manipulate vm_start/vm_end to trick dma_mmap_coherent
> into
> +	 * performing partial mappings, which is a bit of a hack, but safe
> +	 * while we are under mmap_lock().  Eventually this needs to be
> +	 * replaced by a proper DMA layer API.
>  	 */
> -	if (!offset) {
> -		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
> -		*size = MIC_DP_SIZE;
> -		return 0;
> -	}
> +	vma->vm_end = vma->vm_start + MIC_DP_SIZE;
> +	err = vdev->vpdev->hw_ops->dp_mmap(vdev->vpdev, vma);
> +	if (err)
> +		goto out;
> 
>  	for (i = 0; i < vdev->dd->num_vq; i++) {
>  		struct vop_vringh *vvr = &vdev->vvr[i];
> 
> -		if (offset == start) {
> -			*pa = virt_to_phys(vvr->vring.va);
> -			*size = vvr->vring.len;
> -			return 0;
> -		}
> -		start += vvr->vring.len;
> -	}
> -	return -1;
> -}
> -
> -/*
> - * Maps the device page and virtio rings to user space for readonly access.
> - */
> -static int vop_mmap(struct file *f, struct vm_area_struct *vma) -{
> -	struct vop_vdev *vdev = f->private_data;
> -	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
> -	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem =
> size;
> -	int i, err;
> +		vma->vm_start = vma->vm_end;
> +		vma->vm_end += vvr->vring.len;
> 
> -	err = vop_vdev_inited(vdev);
> -	if (err)
> -		goto ret;
> -	if (vma->vm_flags & VM_WRITE) {
> -		err = -EACCES;
> -		goto ret;
> -	}
> -	while (size_rem) {
> -		i = vop_query_offset(vdev, offset, &size, &pa);
> -		if (i < 0) {
> -			err = -EINVAL;
> -			goto ret;
> -		}
> -		err = remap_pfn_range(vma, vma->vm_start + offset,
> -				      pa >> PAGE_SHIFT, size,
> -				      vma->vm_page_prot);
> +		err = -EINVAL;
> +		if (vma->vm_end > orig_end)
> +			goto out;
> +		err = dma_mmap_coherent(vop_dev(vdev), vma, vvr-
> >vring.va,
> +					le64_to_cpu(vqconfig[i].address),
> +					vvr->vring.len);
>  		if (err)
> -			goto ret;
> -		size_rem -= size;
> -		offset += size;
> +			goto out;
>  	}
> -ret:
> +out:
> +	/*
> +	 * Restore the original vma parameters.
> +	 */
> +	vma->vm_start = orig_start;
> +	vma->vm_end = orig_end;
>  	return err;
>  }
> 
> --
> 2.28.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ