lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <27ba11cf6cb60c7a0056a00e71f22a3ea1e61714.camel@linux.ibm.com>
Date: Mon, 16 Sep 2024 17:53:57 +0200
From: Niklas Schnelle <schnelle@...ux.ibm.com>
To: Matthew Rosato <mjrosato@...ux.ibm.com>, joro@...tes.org, will@...nel.org,
        robin.murphy@....com, gerald.schaefer@...ux.ibm.com
Cc: jgg@...pe.ca, baolu.lu@...ux.intel.com, hca@...ux.ibm.com,
        gor@...ux.ibm.com, agordeev@...ux.ibm.com, svens@...ux.ibm.com,
        jroedel@...e.de, iommu@...ts.linux.dev, linux-kernel@...r.kernel.org,
        linux-s390@...r.kernel.org
Subject: Re: [PATCH v4] iommu/s390: Implement blocking domain

On Tue, 2024-09-10 at 17:15 -0400, Matthew Rosato wrote:
> This fixes a crash when surprise hot-unplugging a PCI device. This crash
> happens because during hot-unplug __iommu_group_set_domain_nofail()
> attaching the default domain fails when the platform no longer
> recognizes the device as it has already been removed and we end up with

Just to clarify my original wording here based on Jason's comments. The
device is already removed from the point of view of the platform
hypervisor not from the point of view of Linux that's why I phrased it
as the platform no longer recognizing it.

> a NULL domain pointer and UAF. This is exactly the case referred to in
> the second comment in __iommu_device_set_domain() and just as stated
> there if we can instead attach the blocking domain the UAF is prevented
> as this can handle the already removed device. Implement the blocking
> domain to use this handling.  With this change, the crash is fixed but
> we still hit a warning attempting to change DMA ownership on a blocked
> device.
> 
> Fixes: c76c067e488c ("s390/pci: Use dma-iommu layer")
> Co-developed-by: Niklas Schnelle <schnelle@...ux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@...ux.ibm.com>
> Signed-off-by: Matthew Rosato <mjrosato@...ux.ibm.com>
> ---
> Changes for v4:
> - fix lockdep assert
> Changes for v3:
> - make blocking_domain type iommu_domain
> - change zdev->s390_domain to type iommu_domain and remove most uses
> - remove s390_iommu_detach_device, use blocking domain attach
> - add spinlock to serialize zdev->s390_domain change / access to counters
> ---
>  arch/s390/include/asm/pci.h |  4 +-
>  arch/s390/pci/pci.c         |  3 ++
>  arch/s390/pci/pci_debug.c   | 10 ++++-
>  drivers/iommu/s390-iommu.c  | 73 +++++++++++++++++++++++--------------
>  4 files changed, 59 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index 30820a649e6e..a60a291fbd58 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -96,7 +96,6 @@ struct zpci_bar_struct {
>  	u8		size;		/* order 2 exponent */
>  };
>  
> -struct s390_domain;
>  struct kvm_zdev;
>  
>  #define ZPCI_FUNCTIONS_PER_BUS 256
> @@ -181,9 +180,10 @@ struct zpci_dev {
>  	struct dentry	*debugfs_dev;
>  
>  	/* IOMMU and passthrough */
> -	struct s390_domain *s390_domain; /* s390 IOMMU domain data */
> +	struct iommu_domain *s390_domain; /* attached IOMMU domain */
>  	struct kvm_zdev *kzdev;
>  	struct mutex kzdev_lock;
> +	spinlock_t dom_lock;		/* protect s390_domain change */
>  };
>  
> 
---8<---
>  
> +static struct iommu_domain blocking_domain;
> +

I originally looked at using struct iommu_domain for the blocking
domain when I started on this as well but got scared away by having to
change it in more places. Turns out it's not actually that bad. And as
Jason said this better matches the other IOMMU drivers and overall
design.

>  static inline unsigned int calc_rtx(dma_addr_t ptr)
>  {
>  	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
> @@ -369,20 +371,36 @@ static void s390_domain_free(struct iommu_domain *domain)
>  	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
>  }
>  
> -static void s390_iommu_detach_device(struct iommu_domain *domain,
> -				     struct device *dev)
> +static void zdev_s390_domain_update(struct zpci_dev *zdev,
> +				    struct iommu_domain *domain)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&zdev->dom_lock, flags);
> +	zdev->s390_domain = domain;
> +	spin_unlock_irqrestore(&zdev->dom_lock, flags);
> +}
> +
> +static int blocking_domain_attach_device(struct iommu_domain *domain,
> +					 struct device *dev)
>  {
> -	struct s390_domain *s390_domain = to_s390_domain(domain);
>  	struct zpci_dev *zdev = to_zpci_dev(dev);
> +	struct s390_domain *s390_domain;
>  	unsigned long flags;
>  
> +	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
> +		return 0;
> +
> +	s390_domain = to_s390_domain(zdev->s390_domain);
>  	spin_lock_irqsave(&s390_domain->list_lock, flags);
>  	list_del_rcu(&zdev->iommu_list);
>  	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>  
>  	zpci_unregister_ioat(zdev, 0);
> -	zdev->s390_domain = NULL;
>  	zdev->dma_table = NULL;
> +	zdev_s390_domain_update(zdev, domain);
> +
> +	return 0;
>  }

I like that we get rid of s390_iommu_detach_device() really makes it
simpler to think of the DMA as blocked rather than no IOMMU being
attached.

>  
>  static int s390_iommu_attach_device(struct iommu_domain *domain,
> @@ -401,20 +419,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>  		domain->geometry.aperture_end < zdev->start_dma))
>  		return -EINVAL;
>  
> -	if (zdev->s390_domain)
> -		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
> +	blocking_domain_attach_device(&blocking_domain, dev);
>  
> +	/* If we fail now DMA remains blocked via blocking domain */
>  	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
>  				virt_to_phys(s390_domain->dma_table), &status);
> -	/*
> -	 * If the device is undergoing error recovery the reset code
> -	 * will re-establish the new domain.
> -	 */
>  	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
>  		return -EIO;
> -
>  	zdev->dma_table = s390_domain->dma_table;
> -	zdev->s390_domain = s390_domain;
> +	zdev_s390_domain_update(zdev, domain);
>  
>  	spin_lock_irqsave(&s390_domain->list_lock, flags);
>  	list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
> @@ -466,19 +479,11 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
>  	if (zdev->tlb_refresh)
>  		dev->iommu->shadow_on_flush = 1;
>  
> -	return &zdev->iommu_dev;
> -}
> +	/* Start with DMA blocked */
> +	spin_lock_init(&zdev->dom_lock);
> +	zdev_s390_domain_update(zdev, &blocking_domain);

Thanks for incorporating this suggestion from Jason, makes sense to
start out as blocked especially since that matches the platform
behavior too.

>  
> -static void s390_iommu_release_device(struct device *dev)
> -{
> -	struct zpci_dev *zdev = to_zpci_dev(dev);
> -
> -	/*
> -	 * release_device is expected to detach any domain currently attached
> -	 * to the device, but keep it attached to other devices in the group.
> -	 */
> -	if (zdev)
> -		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
> +	return &zdev->iommu_dev;
>  }
>  
>  static int zpci_refresh_all(struct zpci_dev *zdev)
> @@ -697,9 +702,15 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
>  
>  struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
>  {
> -	if (!zdev || !zdev->s390_domain)
> +	struct s390_domain *s390_domain;
> +
> +	lockdep_assert_held(&zdev->dom_lock);
> +
> +	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
>  		return NULL;
> -	return &zdev->s390_domain->ctrs;
> +
> +	s390_domain = to_s390_domain(zdev->s390_domain);
> +	return &s390_domain->ctrs;
>  }
>  
>  int zpci_init_iommu(struct zpci_dev *zdev)
> @@ -776,11 +787,19 @@ static int __init s390_iommu_init(void)
>  }
>  subsys_initcall(s390_iommu_init);
>  
> +static struct iommu_domain blocking_domain = {
> +	.type = IOMMU_DOMAIN_BLOCKED,
> +	.ops = &(const struct iommu_domain_ops) {
> +		.attach_dev	= blocking_domain_attach_device,
> +	}
> +};
> +
>  static const struct iommu_ops s390_iommu_ops = {
> +	.blocked_domain		= &blocking_domain,
> +	.release_domain		= &blocking_domain,
>  	.capable = s390_iommu_capable,
>  	.domain_alloc_paging = s390_domain_alloc_paging,
>  	.probe_device = s390_iommu_probe_device,
> -	.release_device = s390_iommu_release_device,
>  	.device_group = generic_device_group,
>  	.pgsize_bitmap = SZ_4K,
>  	.get_resv_regions = s390_iommu_get_resv_regions,

Thanks for taking care of the suggestions I agree these are all useful
improvements over my original patch.

Reviewed-by: Niklas Schnelle <schnelle@...ux.ibm.com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ