lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aA_hm_AD4Xzm3qJZ@google.com>
Date: Mon, 28 Apr 2025 20:14:19 +0000
From: Pranjal Shrivastava <praan@...gle.com>
To: Nicolin Chen <nicolinc@...dia.com>
Cc: jgg@...dia.com, kevin.tian@...el.com, corbet@....net, will@...nel.org,
	bagasdotme@...il.com, robin.murphy@....com, joro@...tes.org,
	thierry.reding@...il.com, vdumpa@...dia.com, jonathanh@...dia.com,
	shuah@...nel.org, jsnitsel@...hat.com, nathan@...nel.org,
	peterz@...radead.org, yi.l.liu@...el.com, mshavit@...gle.com,
	zhangzekun11@...wei.com, iommu@...ts.linux.dev,
	linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
	linux-arm-kernel@...ts.infradead.org, linux-tegra@...r.kernel.org,
	linux-kselftest@...r.kernel.org, patches@...ts.linux.dev,
	mochs@...dia.com, alok.a.tiwari@...cle.com, vasant.hegde@....com
Subject: Re: [PATCH v2 08/22] iommufd: Abstract iopt_pin_pages and
 iopt_unpin_pages helpers

On Fri, Apr 25, 2025 at 10:58:03PM -0700, Nicolin Chen wrote:
> The new vCMDQ object will be added for HW to access the guest memory for a
> HW-accelerated virtualization feature. It needs to ensure the guest memory
> pages are pinned when HW accesses them and they are contiguous in physical
> address space.
> 
> This is very like the existing iommufd_access_pin_pages() that outputs the
> pinned page list for the caller to test its contiguity.
> 
> Move those code from iommufd_access_pin/unpin_pages() and related function
> for a pair of iopt helpers that can be shared with the vCMDQ allocator. As
> the vCMDQ allocator will be a user-space triggered ioctl function, WARN_ON
> would not be a good fit in the new iopt_unpin_pages(), thus change them to
> use WARN_ON_ONCE instead.
> 
> Rename check_area_prot() to align with the existing iopt_area helpers, and
> inline it to the header since iommufd_access_rw() still uses it.
> 
> Signed-off-by: Nicolin Chen <nicolinc@...dia.com>
> ---
>  drivers/iommu/iommufd/io_pagetable.h    |   8 ++
>  drivers/iommu/iommufd/iommufd_private.h |   6 ++
>  drivers/iommu/iommufd/device.c          | 117 ++----------------------
>  drivers/iommu/iommufd/io_pagetable.c    |  95 +++++++++++++++++++
>  4 files changed, 117 insertions(+), 109 deletions(-)
> 
> diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
> index 10c928a9a463..4288a2b1a90f 100644
> --- a/drivers/iommu/iommufd/io_pagetable.h
> +++ b/drivers/iommu/iommufd/io_pagetable.h
> @@ -114,6 +114,14 @@ static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area,
>  	return iopt_area_start_byte(area, iova) / PAGE_SIZE;
>  }
>  
> +static inline bool iopt_area_check_prot(struct iopt_area *area,
> +					unsigned int flags)
> +{
> +	if (flags & IOMMUFD_ACCESS_RW_WRITE)
> +		return area->iommu_prot & IOMMU_WRITE;
> +	return area->iommu_prot & IOMMU_READ;
> +}
> +
>  #define __make_iopt_iter(name)                                                 \
>  	static inline struct iopt_##name *iopt_##name##_iter_first(            \
>  		struct io_pagetable *iopt, unsigned long start,                \
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 8d96aa514033..79160b039bc7 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -130,6 +130,12 @@ int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
>  void iopt_enable_large_pages(struct io_pagetable *iopt);
>  int iopt_disable_large_pages(struct io_pagetable *iopt);
>  
> +int iopt_pin_pages(struct io_pagetable *iopt, unsigned long iova,
> +		   unsigned long length, struct page **out_pages,
> +		   unsigned int flags);
> +void iopt_unpin_pages(struct io_pagetable *iopt, unsigned long iova,
> +		      unsigned long length);
> +
>  struct iommufd_ucmd {
>  	struct iommufd_ctx *ictx;
>  	void __user *ubuffer;
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 2111bad72c72..a5c6be164254 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -1240,58 +1240,17 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
>  void iommufd_access_unpin_pages(struct iommufd_access *access,
>  				unsigned long iova, unsigned long length)
>  {
> -	struct iopt_area_contig_iter iter;
> -	struct io_pagetable *iopt;
> -	unsigned long last_iova;
> -	struct iopt_area *area;
> -
> -	if (WARN_ON(!length) ||
> -	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
> -		return;
> -
> -	mutex_lock(&access->ioas_lock);
> +	guard(mutex)(&access->ioas_lock);
>  	/*
>  	 * The driver must be doing something wrong if it calls this before an
>  	 * iommufd_access_attach() or after an iommufd_access_detach().
>  	 */
> -	if (WARN_ON(!access->ioas_unpin)) {
> -		mutex_unlock(&access->ioas_lock);
> +	if (WARN_ON(!access->ioas_unpin))
>  		return;
> -	}
> -	iopt = &access->ioas_unpin->iopt;
> -
> -	down_read(&iopt->iova_rwsem);
> -	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
> -		iopt_area_remove_access(
> -			area, iopt_area_iova_to_index(area, iter.cur_iova),
> -			iopt_area_iova_to_index(
> -				area,
> -				min(last_iova, iopt_area_last_iova(area))));
> -	WARN_ON(!iopt_area_contig_done(&iter));
> -	up_read(&iopt->iova_rwsem);
> -	mutex_unlock(&access->ioas_lock);
> +	iopt_unpin_pages(&access->ioas_unpin->iopt, iova, length);
>  }
>  EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, "IOMMUFD");
>  
> -static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
> -{
> -	if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
> -		return false;
> -
> -	if (!iopt_area_contig_done(iter) &&
> -	    (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
> -	     PAGE_SIZE) != (PAGE_SIZE - 1))
> -		return false;
> -	return true;
> -}
> -
> -static bool check_area_prot(struct iopt_area *area, unsigned int flags)
> -{
> -	if (flags & IOMMUFD_ACCESS_RW_WRITE)
> -		return area->iommu_prot & IOMMU_WRITE;
> -	return area->iommu_prot & IOMMU_READ;
> -}
> -
>  /**
>   * iommufd_access_pin_pages() - Return a list of pages under the iova
>   * @access: IOAS access to act on
> @@ -1315,76 +1274,16 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
>  			     unsigned long length, struct page **out_pages,
>  			     unsigned int flags)
>  {
> -	struct iopt_area_contig_iter iter;
> -	struct io_pagetable *iopt;
> -	unsigned long last_iova;
> -	struct iopt_area *area;
> -	int rc;
> -
>  	/* Driver's ops don't support pin_pages */
>  	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
>  	    WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
>  		return -EINVAL;
>  
> -	if (!length)
> -		return -EINVAL;
> -	if (check_add_overflow(iova, length - 1, &last_iova))
> -		return -EOVERFLOW;
> -
> -	mutex_lock(&access->ioas_lock);
> -	if (!access->ioas) {
> -		mutex_unlock(&access->ioas_lock);
> +	guard(mutex)(&access->ioas_lock);
> +	if (!access->ioas)
>  		return -ENOENT;
> -	}
> -	iopt = &access->ioas->iopt;
> -
> -	down_read(&iopt->iova_rwsem);
> -	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
> -		unsigned long last = min(last_iova, iopt_area_last_iova(area));
> -		unsigned long last_index = iopt_area_iova_to_index(area, last);
> -		unsigned long index =
> -			iopt_area_iova_to_index(area, iter.cur_iova);
> -
> -		if (area->prevent_access ||
> -		    !iopt_area_contig_is_aligned(&iter)) {
> -			rc = -EINVAL;
> -			goto err_remove;
> -		}
> -
> -		if (!check_area_prot(area, flags)) {
> -			rc = -EPERM;
> -			goto err_remove;
> -		}
> -
> -		rc = iopt_area_add_access(area, index, last_index, out_pages,
> -					  flags);
> -		if (rc)
> -			goto err_remove;
> -		out_pages += last_index - index + 1;
> -	}
> -	if (!iopt_area_contig_done(&iter)) {
> -		rc = -ENOENT;
> -		goto err_remove;
> -	}
> -
> -	up_read(&iopt->iova_rwsem);
> -	mutex_unlock(&access->ioas_lock);
> -	return 0;
> -
> -err_remove:
> -	if (iova < iter.cur_iova) {
> -		last_iova = iter.cur_iova - 1;
> -		iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
> -			iopt_area_remove_access(
> -				area,
> -				iopt_area_iova_to_index(area, iter.cur_iova),
> -				iopt_area_iova_to_index(
> -					area, min(last_iova,
> -						  iopt_area_last_iova(area))));
> -	}
> -	up_read(&iopt->iova_rwsem);
> -	mutex_unlock(&access->ioas_lock);
> -	return rc;
> +	return iopt_pin_pages(&access->ioas->iopt, iova, length, out_pages,
> +			      flags);
>  }
>  EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, "IOMMUFD");
>  
> @@ -1431,7 +1330,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
>  			goto err_out;
>  		}
>  
> -		if (!check_area_prot(area, flags)) {
> +		if (!iopt_area_check_prot(area, flags)) {
>  			rc = -EPERM;
>  			goto err_out;
>  		}
> diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
> index 8a790e597e12..160eec49af1b 100644
> --- a/drivers/iommu/iommufd/io_pagetable.c
> +++ b/drivers/iommu/iommufd/io_pagetable.c
> @@ -1472,3 +1472,98 @@ int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt,
>  	up_write(&iopt->iova_rwsem);
>  	return rc;
>  }
> +
> +static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
> +{
> +	if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
> +		return false;
> +
> +	if (!iopt_area_contig_done(iter) &&
> +	    (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
> +	     PAGE_SIZE) != (PAGE_SIZE - 1))
> +		return false;
> +	return true;
> +}
> +
> +int iopt_pin_pages(struct io_pagetable *iopt, unsigned long iova,
> +		   unsigned long length, struct page **out_pages,
> +		   unsigned int flags)
> +{
> +	struct iopt_area_contig_iter iter;
> +	unsigned long last_iova;
> +	struct iopt_area *area;
> +	int rc;
> +
> +	if (!length)
> +		return -EINVAL;
> +	if (check_add_overflow(iova, length - 1, &last_iova))
> +		return -EOVERFLOW;
> +
> +	down_read(&iopt->iova_rwsem);
> +	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
> +		unsigned long last = min(last_iova, iopt_area_last_iova(area));
> +		unsigned long last_index = iopt_area_iova_to_index(area, last);
> +		unsigned long index =
> +			iopt_area_iova_to_index(area, iter.cur_iova);
> +
> +		if (area->prevent_access ||

Nit:
Shouldn't we return -EBUSY or something if (area->prevent_access == 1) ?
IIUC, this just means that an unmap attempt is in progress, hence avoid
accessing the area.

> +		    !iopt_area_contig_is_aligned(&iter)) {
> +			rc = -EINVAL;
> +			goto err_remove;
> +		}
> +
> +		if (!iopt_area_check_prot(area, flags)) {
> +			rc = -EPERM;
> +			goto err_remove;
> +		}
> +
> +		rc = iopt_area_add_access(area, index, last_index, out_pages,
> +					  flags);
> +		if (rc)
> +			goto err_remove;
> +		out_pages += last_index - index + 1;
> +	}
> +	if (!iopt_area_contig_done(&iter)) {
> +		rc = -ENOENT;
> +		goto err_remove;
> +	}
> +
> +	up_read(&iopt->iova_rwsem);
> +	return 0;
> +
> +err_remove:
> +	if (iova < iter.cur_iova) {
> +		last_iova = iter.cur_iova - 1;
> +		iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
> +			iopt_area_remove_access(
> +				area,
> +				iopt_area_iova_to_index(area, iter.cur_iova),
> +				iopt_area_iova_to_index(
> +					area, min(last_iova,
> +						  iopt_area_last_iova(area))));
> +	}
> +	up_read(&iopt->iova_rwsem);
> +	return rc;
> +}
> +
> +void iopt_unpin_pages(struct io_pagetable *iopt, unsigned long iova,
> +		      unsigned long length)
> +{
> +	struct iopt_area_contig_iter iter;
> +	unsigned long last_iova;
> +	struct iopt_area *area;
> +
> +	if (WARN_ON_ONCE(!length) ||
> +	    WARN_ON_ONCE(check_add_overflow(iova, length - 1, &last_iova)))
> +		return;
> +
> +	down_read(&iopt->iova_rwsem);
> +	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
> +		iopt_area_remove_access(
> +			area, iopt_area_iova_to_index(area, iter.cur_iova),
> +			iopt_area_iova_to_index(
> +				area,
> +				min(last_iova, iopt_area_last_iova(area))));
> +	WARN_ON_ONCE(!iopt_area_contig_done(&iter));
> +	up_read(&iopt->iova_rwsem);
> +}
> -- 
> 2.43.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ