[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <787fd89b-fbc0-4fd5-a1af-63dfddf13435@redhat.com>
Date: Thu, 23 Jan 2025 18:10:47 +0100
From: Eric Auger <eric.auger@...hat.com>
To: Nicolin Chen <nicolinc@...dia.com>, will@...nel.org,
robin.murphy@....com, jgg@...dia.com, kevin.tian@...el.com,
tglx@...utronix.de, maz@...nel.org, alex.williamson@...hat.com
Cc: joro@...tes.org, shuah@...nel.org, reinette.chatre@...el.com,
yebin10@...wei.com, apatel@...tanamicro.com,
shivamurthy.shastri@...utronix.de, bhelgaas@...gle.com,
anna-maria@...utronix.de, yury.norov@...il.com, nipun.gupta@....com,
iommu@...ts.linux.dev, linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, kvm@...r.kernel.org,
linux-kselftest@...r.kernel.org, patches@...ts.linux.dev,
jean-philippe@...aro.org, mdf@...nel.org, mshavit@...gle.com,
shameerali.kolothum.thodi@...wei.com, smostafa@...gle.com, ddutile@...hat.com
Subject: Re: [PATCH RFCv2 03/13] iommu: Make iommu_dma_prepare_msi() into a
generic operation
Hi,
On 1/11/25 4:32 AM, Nicolin Chen wrote:
> From: Jason Gunthorpe <jgg@...dia.com>
>
> SW_MSI supports IOMMU to translate an MSI message before the MSI message
> is delivered to the interrupt controller. On such systems the iommu_domain
> must have a translation for the MSI message for interrupts to work.
>
> The IRQ subsystem will call into IOMMU to request that a physical page be
> setup to receive MSI message, and the IOMMU then sets an IOVA that maps to
> that physical page. Ultimately the IOVA is programmed into the device via
> the msi_msg.
>
> Generalize this to allow the iommu_domain owner to provide its own
> implementation of this mapping. Add a function pointer to struct
> iommu_domain to allow the domain owner to provide an implementation.
>
> Have dma-iommu supply its implementation for IOMMU_DOMAIN_DMA types during
> the iommu_get_dma_cookie() path. For IOMMU_DOMAIN_UNMANAGED types used by
> VFIO (and iommufd for now), have the same iommu_dma_sw_msi set as well in
> the iommu_get_msi_cookie() path.
>
> Hold the group mutex while in iommu_dma_prepare_msi() to ensure the domain
> doesn't change or become freed while running. Races with IRQ operations
> from VFIO and domain changes from iommufd are possible here.
this was my question in previous comments
>
> Rreplace the msi_prepare_lock with a lockdep assertion for the group mutex
Replace
> as documentation. For the dma_iommu.c each iommu_domain unique to a
is?
> group.
>
> Signed-off-by: Jason Gunthorpe <jgg@...dia.com>
> [nicolinc: move iommu_domain_set_sw_msi() from iommu_dma_init_domain() to
> iommu_dma_init_domain(); add in iommu_put_dma_cookie() an sw_msi test]
> Signed-off-by: Nicolin Chen <nicolinc@...dia.com>
> ---
> include/linux/iommu.h | 44 ++++++++++++++++++++++++++-------------
> drivers/iommu/dma-iommu.c | 33 +++++++++++++----------------
> drivers/iommu/iommu.c | 29 ++++++++++++++++++++++++++
> 3 files changed, 73 insertions(+), 33 deletions(-)
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 3a4215966c1b..423fdfa6b3bb 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -44,6 +44,8 @@ struct iommu_dma_cookie;
> struct iommu_fault_param;
> struct iommufd_ctx;
> struct iommufd_viommu;
> +struct msi_desc;
> +struct msi_msg;
>
> #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */
> #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */
> @@ -216,6 +218,12 @@ struct iommu_domain {
> struct iommu_domain_geometry geometry;
> struct iommu_dma_cookie *iova_cookie;
> int (*iopf_handler)(struct iopf_group *group);
> +
> +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
> + int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc,
> + phys_addr_t msi_addr);
> +#endif
> +
> void *fault_data;
> union {
> struct {
> @@ -234,6 +242,16 @@ struct iommu_domain {
> };
> };
>
> +static inline void iommu_domain_set_sw_msi(
> + struct iommu_domain *domain,
> + int (*sw_msi)(struct iommu_domain *domain, struct msi_desc *desc,
> + phys_addr_t msi_addr))
> +{
> +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
> + domain->sw_msi = sw_msi;
> +#endif
> +}
> +
> static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
> {
> return domain->type & __IOMMU_DOMAIN_DMA_API;
> @@ -1475,6 +1493,18 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
> static inline void iommu_free_global_pasid(ioasid_t pasid) {}
> #endif /* CONFIG_IOMMU_API */
>
> +#ifdef CONFIG_IRQ_MSI_IOMMU
> +#ifdef CONFIG_IOMMU_API
> +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> +#else
> +static inline int iommu_dma_prepare_msi(struct msi_desc *desc,
> + phys_addr_t msi_addr)
> +{
> + return 0;
> +}
> +#endif /* CONFIG_IOMMU_API */
> +#endif /* CONFIG_IRQ_MSI_IOMMU */
> +
> #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API)
> void iommu_group_mutex_assert(struct device *dev);
> #else
> @@ -1508,26 +1538,12 @@ static inline void iommu_debugfs_setup(void) {}
> #endif
>
> #ifdef CONFIG_IOMMU_DMA
> -#include <linux/msi.h>
> -
> int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
> -
> -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
> -
> #else /* CONFIG_IOMMU_DMA */
> -
> -struct msi_desc;
> -struct msi_msg;
> -
> static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
> {
> return -ENODEV;
> }
> -
> -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> -{
> - return 0;
> -}
> #endif /* CONFIG_IOMMU_DMA */
>
> /*
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index bf91e014d179..3b58244e6344 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -24,6 +24,7 @@
> #include <linux/memremap.h>
> #include <linux/mm.h>
> #include <linux/mutex.h>
> +#include <linux/msi.h>
> #include <linux/of_iommu.h>
> #include <linux/pci.h>
> #include <linux/scatterlist.h>
> @@ -102,6 +103,9 @@ static int __init iommu_dma_forcedac_setup(char *str)
> }
> early_param("iommu.forcedac", iommu_dma_forcedac_setup);
>
> +static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
> + phys_addr_t msi_addr);
> +
> /* Number of entries per flush queue */
> #define IOVA_DEFAULT_FQ_SIZE 256
> #define IOVA_SINGLE_FQ_SIZE 32768
> @@ -398,6 +402,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
> return -ENOMEM;
>
> mutex_init(&domain->iova_cookie->mutex);
> + iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi);
> return 0;
> }
>
> @@ -429,6 +434,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
>
> cookie->msi_iova = base;
> domain->iova_cookie = cookie;
> + iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi);
> return 0;
> }
> EXPORT_SYMBOL(iommu_get_msi_cookie);
> @@ -443,6 +449,9 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
> struct iommu_dma_cookie *cookie = domain->iova_cookie;
> struct iommu_dma_msi_page *msi, *tmp;
>
> + if (domain->sw_msi != iommu_dma_sw_msi)
> + return;
> +
I don't get the above check. The comment says this is also called for a
cookie prepared with iommu_get_dma_cookie(). Don't you need to do some
cleanup for this latter?
> if (!cookie)
> return;
>
> @@ -1800,33 +1809,19 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
> return NULL;
> }
>
> -/**
> - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
> - * @desc: MSI descriptor, will store the MSI page
> - * @msi_addr: MSI target address to be mapped
> - *
> - * Return: 0 on success or negative error code if the mapping failed.
> - */
> -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> +static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
> + phys_addr_t msi_addr)
> {
> struct device *dev = msi_desc_to_dev(desc);
> - struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> - struct iommu_dma_msi_page *msi_page;
> - static DEFINE_MUTEX(msi_prepare_lock); /* see below */
> + const struct iommu_dma_msi_page *msi_page;
>
> - if (!domain || !domain->iova_cookie) {
> + if (!domain->iova_cookie) {
> msi_desc_set_iommu_msi_iova(desc, 0, 0);
> return 0;
> }
>
> - /*
> - * In fact the whole prepare operation should already be serialised by
> - * irq_domain_mutex further up the callchain, but that's pretty subtle
> - * on its own, so consider this locking as failsafe documentation...
> - */
> - mutex_lock(&msi_prepare_lock);
> + iommu_group_mutex_assert(dev);
> msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
> - mutex_unlock(&msi_prepare_lock);
> if (!msi_page)
> return -ENOMEM;
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 599030e1e890..fbbbcdba8a4f 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -3587,3 +3587,32 @@ int iommu_replace_group_handle(struct iommu_group *group,
> return ret;
> }
> EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
> +
> +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
> +/**
> + * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
> + * @desc: MSI descriptor, will store the MSI page
> + * @msi_addr: MSI target address to be mapped
> + *
> + * The implementation of sw_msi() should take msi_addr and map it to
> + * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the
> + * mapping information.
> + *
> + * Return: 0 on success or negative error code if the mapping failed.
> + */
> +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
> +{
> + struct device *dev = msi_desc_to_dev(desc);
> + struct iommu_group *group = dev->iommu_group;
> + int ret = 0;
> +
> + if (!group)
> + return 0;
> +
> + mutex_lock(&group->mutex);
> + if (group->domain && group->domain->sw_msi)
> + ret = group->domain->sw_msi(group->domain, desc, msi_addr);
> + mutex_unlock(&group->mutex);
> + return ret;
> +}
> +#endif /* CONFIG_IRQ_MSI_IOMMU */
Thanks
Eric
Powered by blists - more mailing lists