[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220314194451.58266-19-mjrosato@linux.ibm.com>
Date: Mon, 14 Mar 2022 15:44:37 -0400
From: Matthew Rosato <mjrosato@...ux.ibm.com>
To: linux-s390@...r.kernel.org
Cc: alex.williamson@...hat.com, cohuck@...hat.com,
schnelle@...ux.ibm.com, farman@...ux.ibm.com, pmorel@...ux.ibm.com,
borntraeger@...ux.ibm.com, hca@...ux.ibm.com, gor@...ux.ibm.com,
gerald.schaefer@...ux.ibm.com, agordeev@...ux.ibm.com,
svens@...ux.ibm.com, frankja@...ux.ibm.com, david@...hat.com,
imbrenda@...ux.ibm.com, vneethv@...ux.ibm.com,
oberpar@...ux.ibm.com, freude@...ux.ibm.com, thuth@...hat.com,
pasic@...ux.ibm.com, joro@...tes.org, will@...nel.org,
pbonzini@...hat.com, corbet@....net, jgg@...dia.com,
kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
iommu@...ts.linux-foundation.org, linux-doc@...r.kernel.org
Subject: [PATCH v4 18/32] iommu/s390: add support for IOMMU_DOMAIN_KVM
Add an alternate domain ops for type IOMMU_DOMAIN_KVM. This type is
intended for use when KVM is managing the IOMMU domain on behalf of a
VM. Mapping can only be performed once a KVM is registered with the
domain as well as a guest IOTA (address translation anchor).
The map operation is expected to be received in response to an
04 intercept of a guest RPCIT instruction, and will perform a
synchronization operation between the host DMA and guest DMA tables
over the range specified.
Signed-off-by: Matthew Rosato <mjrosato@...ux.ibm.com>
---
arch/s390/include/asm/kvm_pci.h | 6 +
arch/s390/include/asm/pci_dma.h | 3 +
drivers/iommu/Kconfig | 8 +
drivers/iommu/Makefile | 1 +
drivers/iommu/s390-iommu.c | 49 ++--
drivers/iommu/s390-iommu.h | 53 ++++
drivers/iommu/s390-kvm-iommu.c | 469 ++++++++++++++++++++++++++++++++
7 files changed, 562 insertions(+), 27 deletions(-)
create mode 100644 drivers/iommu/s390-iommu.h
create mode 100644 drivers/iommu/s390-kvm-iommu.c
diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
index ae8669105f72..ebc0da5d9ac1 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -11,6 +11,7 @@
#define ASM_KVM_PCI_H
#include <linux/types.h>
+#include <linux/iommu.h>
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
@@ -19,9 +20,14 @@
struct kvm_zdev {
struct zpci_dev *zdev;
struct kvm *kvm;
+ struct iommu_domain *dom; /* Used to invoke IOMMU API for RPCIT */
};
int kvm_s390_pci_dev_open(struct zpci_dev *zdev);
void kvm_s390_pci_dev_release(struct zpci_dev *zdev);
+int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm);
+int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota);
+int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev);
+
#endif /* ASM_KVM_PCI_H */
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..38004e0a4383 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -50,6 +50,9 @@ enum zpci_ioat_dtype {
#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE
#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long))
#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_TABLE_PAGES (ZPCI_TABLE_SIZE >> PAGE_SHIFT)
+#define ZPCI_TABLE_ENTRIES_PAGES (ZPCI_TABLE_ENTRIES * ZPCI_TABLE_PAGES)
+#define ZPCI_TABLE_ENTRIES_PER_PAGE (ZPCI_TABLE_ENTRIES / ZPCI_TABLE_PAGES)
#define ZPCI_TABLE_BITS 11
#define ZPCI_PT_BITS 8
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3eb68fa1b8cc..9637f73925ec 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -411,6 +411,14 @@ config S390_AP_IOMMU
Enables bits of IOMMU API required by VFIO. The iommu_ops
is not implemented as it is not necessary for VFIO.
+config S390_KVM_IOMMU
+ bool "S390 KVM IOMMU Support"
+ depends on S390_IOMMU && KVM || COMPILE_TEST
+ select IOMMU_API
+ help
+ Extends the S390 IOMMU API to support a domain owned and managed by
+ KVM. This allows KVM to manage nested mappings vs userspace.
+
config MTK_IOMMU
tristate "MediaTek IOMMU Support"
depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index bc7f730edbb0..5476e978d7f5 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+obj-$(CONFIG_S390_KVM_IOMMU) += s390-kvm-iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 73a85c599dc2..0ead37f6e232 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -11,6 +11,7 @@
#include <linux/iommu-helper.h>
#include <linux/sizes.h>
#include <asm/pci_dma.h>
+#include "s390-iommu.h"
/*
* Physically contiguous memory regions can be mapped with 4 KiB alignment,
@@ -21,24 +22,6 @@
static const struct iommu_ops s390_iommu_ops;
-struct s390_domain {
- struct iommu_domain domain;
- struct list_head devices;
- unsigned long *dma_table;
- spinlock_t dma_table_lock;
- spinlock_t list_lock;
-};
-
-struct s390_domain_device {
- struct list_head list;
- struct zpci_dev *zdev;
-};
-
-static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
-{
- return container_of(dom, struct s390_domain, domain);
-}
-
static bool s390_iommu_capable(enum iommu_cap cap)
{
switch (cap) {
@@ -55,7 +38,12 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
{
struct s390_domain *s390_domain;
- if (domain_type != IOMMU_DOMAIN_UNMANAGED)
+ if (domain_type != IOMMU_DOMAIN_UNMANAGED &&
+ domain_type != IOMMU_DOMAIN_KVM)
+ return NULL;
+
+ if (domain_type == IOMMU_DOMAIN_KVM &&
+ !IS_ENABLED(CONFIG_S390_KVM_IOMMU))
return NULL;
s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
@@ -68,23 +56,30 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
return NULL;
}
+ /* If KVM-managed, swap in alternate ops now */
+ if (IS_ENABLED(CONFIG_S390_KVM_IOMMU) &&
+ domain_type == IOMMU_DOMAIN_KVM)
+ s390_domain->domain.ops = &s390_kvm_domain_ops;
+
spin_lock_init(&s390_domain->dma_table_lock);
spin_lock_init(&s390_domain->list_lock);
+ mutex_init(&s390_domain->kvm_dom.ioat_lock);
INIT_LIST_HEAD(&s390_domain->devices);
return &s390_domain->domain;
}
-static void s390_domain_free(struct iommu_domain *domain)
+void s390_domain_free(struct iommu_domain *domain)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
dma_cleanup_tables(s390_domain->dma_table);
+ mutex_destroy(&s390_domain->kvm_dom.ioat_lock);
kfree(s390_domain);
}
-static int s390_iommu_attach_device(struct iommu_domain *domain,
- struct device *dev)
+int s390_iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
@@ -143,8 +138,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return rc;
}
-static void s390_iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
+void s390_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
@@ -200,7 +195,7 @@ static void s390_iommu_release_device(struct device *dev)
if (zdev && zdev->s390_domain) {
domain = iommu_get_domain_for_dev(dev);
if (domain)
- s390_iommu_detach_device(domain, dev);
+ domain->ops->detach_dev(domain, dev);
}
}
@@ -282,8 +277,8 @@ static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
return rc;
}
-static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
- dma_addr_t iova)
+phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
unsigned long *sto, *pto, *rto, flags;
diff --git a/drivers/iommu/s390-iommu.h b/drivers/iommu/s390-iommu.h
new file mode 100644
index 000000000000..21c8243a36b1
--- /dev/null
+++ b/drivers/iommu/s390-iommu.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IOMMU API for s390 PCI devices
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Matthew Rosato <mjrosato@...ux.ibm.com>
+ */
+
+#ifndef _S390_IOMMU_H
+#define _S390_IOMMU_H
+
+#include <linux/iommu.h>
+#include <linux/kvm_host.h>
+
+extern const struct iommu_domain_ops s390_kvm_domain_ops;
+
+struct s390_kvm_domain {
+ struct kvm *kvm;
+ unsigned long *head[ZPCI_TABLE_PAGES];
+ unsigned long **seg;
+ unsigned long ***pt;
+ struct page *(*pin)(struct kvm *kvm, gfn_t gfn);
+ void (*unpin)(kvm_pfn_t pfn);
+ struct mutex ioat_lock;
+ bool map_enabled;
+};
+
+struct s390_domain {
+ struct iommu_domain domain;
+ struct list_head devices;
+ unsigned long *dma_table;
+ spinlock_t dma_table_lock;
+ spinlock_t list_lock;
+ struct s390_kvm_domain kvm_dom;
+};
+
+struct s390_domain_device {
+ struct list_head list;
+ struct zpci_dev *zdev;
+};
+
+static inline struct s390_domain *to_s390_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct s390_domain, domain);
+}
+
+void s390_domain_free(struct iommu_domain *domain);
+int s390_iommu_attach_device(struct iommu_domain *domain, struct device *dev);
+void s390_iommu_detach_device(struct iommu_domain *domain, struct device *dev);
+phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova);
+
+#endif /* _S390_IOMMU_H */
diff --git a/drivers/iommu/s390-kvm-iommu.c b/drivers/iommu/s390-kvm-iommu.c
new file mode 100644
index 000000000000..d24e6904d5f8
--- /dev/null
+++ b/drivers/iommu/s390-kvm-iommu.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU API domain ops for s390 PCI devices using KVM passthrough
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Matthew Rosato <mjrosato@...ux.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/iommu-helper.h>
+#include <linux/sizes.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_pci.h>
+#include <asm/pci_dma.h>
+#include "s390-iommu.h"
+
+const struct iommu_domain_ops s390_kvm_domain_ops;
+
+static int dma_shadow_cpu_trans(struct s390_kvm_domain *kvm_dom,
+ unsigned long *entry, unsigned long *gentry)
+{
+ phys_addr_t gaddr = 0;
+ unsigned long idx;
+ struct page *page;
+ kvm_pfn_t pfn;
+ gpa_t addr;
+ int rc = 0;
+
+ if (pt_entry_isvalid(*gentry)) {
+ /* pin and validate */
+ addr = *gentry & ZPCI_PTE_ADDR_MASK;
+ idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+ page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+ srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+ if (is_error_page(page))
+ return -EIO;
+ gaddr = page_to_phys(page) + (addr & ~PAGE_MASK);
+ }
+
+ if (pt_entry_isvalid(*entry)) {
+ /* Either we are invalidating, replacing or no-op */
+ if (gaddr != 0) {
+ if ((*entry & ZPCI_PTE_ADDR_MASK) == gaddr) {
+ /* Duplicate */
+ kvm_dom->unpin(*entry >> PAGE_SHIFT);
+ } else {
+ /* Replace */
+ pfn = (*entry >> PAGE_SHIFT);
+ invalidate_pt_entry(entry);
+ set_pt_pfaa(entry, gaddr);
+ validate_pt_entry(entry);
+ kvm_dom->unpin(pfn);
+ rc = 1;
+ }
+ } else {
+ /* Invalidate */
+ pfn = (*entry >> PAGE_SHIFT);
+ invalidate_pt_entry(entry);
+ kvm_dom->unpin(pfn);
+ rc = 1;
+ }
+ } else if (gaddr != 0) {
+ /* New Entry */
+ set_pt_pfaa(entry, gaddr);
+ validate_pt_entry(entry);
+ }
+
+ return rc;
+}
+
+static unsigned long *dma_walk_guest_cpu_trans(struct s390_kvm_domain *kvm_dom,
+ dma_addr_t dma_addr)
+{
+ unsigned long *rto, *sto, *pto;
+ unsigned int rtx, rts, sx, px, idx;
+ struct page *page;
+ gpa_t addr;
+ int i;
+
+ /* Pin guest segment table if needed */
+ rtx = calc_rtx(dma_addr);
+ rto = kvm_dom->head[(rtx / ZPCI_TABLE_ENTRIES_PER_PAGE)];
+ rts = rtx * ZPCI_TABLE_PAGES;
+ if (!kvm_dom->seg[rts]) {
+ if (!reg_entry_isvalid(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
+ return NULL;
+ sto = get_rt_sto(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
+ addr = ((u64)sto & ZPCI_RTE_ADDR_MASK);
+ idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+ for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+ page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+ if (is_error_page(page)) {
+ srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+ return NULL;
+ }
+ kvm_dom->seg[rts + i] = (page_to_virt(page) +
+ (addr & ~PAGE_MASK));
+ addr += PAGE_SIZE;
+ }
+ srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+ }
+
+ /* Allocate pin pointers for another segment table if needed */
+ if (!kvm_dom->pt[rtx]) {
+ kvm_dom->pt[rtx] = kcalloc(ZPCI_TABLE_ENTRIES,
+ (sizeof(unsigned long *)),
+ GFP_KERNEL);
+ if (!kvm_dom->pt[rtx])
+ return NULL;
+ }
+ /* Pin guest page table if needed */
+ sx = calc_sx(dma_addr);
+ sto = kvm_dom->seg[(rts + (sx / ZPCI_TABLE_ENTRIES_PER_PAGE))];
+ if (!kvm_dom->pt[rtx][sx]) {
+ if (!reg_entry_isvalid(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
+ return NULL;
+ pto = get_st_pto(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
+ if (!pto)
+ return NULL;
+ addr = ((u64)pto & ZPCI_STE_ADDR_MASK);
+ idx = srcu_read_lock(&kvm_dom->kvm->srcu);
+ page = kvm_dom->pin(kvm_dom->kvm, gpa_to_gfn(addr));
+ srcu_read_unlock(&kvm_dom->kvm->srcu, idx);
+ if (is_error_page(page))
+ return NULL;
+ kvm_dom->pt[rtx][sx] = page_to_virt(page) + (addr & ~PAGE_MASK);
+ }
+ pto = kvm_dom->pt[rtx][sx];
+
+ /* Return guest PTE */
+ px = calc_px(dma_addr);
+ return &pto[px];
+}
+
+static int dma_table_shadow(struct s390_domain *s390_domain,
+ dma_addr_t dma_addr, size_t nr_pages,
+ size_t *mapped_pages)
+{
+ struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+ unsigned long *entry, *gentry;
+ int rc = 0, rc2;
+
+ for (*mapped_pages = 0; *mapped_pages < nr_pages; (*mapped_pages)++) {
+ gentry = dma_walk_guest_cpu_trans(kvm_dom, dma_addr);
+ if (!gentry)
+ continue;
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+
+ if (!entry)
+ return -ENOMEM;
+
+ rc2 = dma_shadow_cpu_trans(kvm_dom, entry, gentry);
+ if (rc2 < 0)
+ return -EIO;
+
+ dma_addr += PAGE_SIZE;
+ rc += rc2;
+ }
+
+ return rc;
+}
+
+static int s390_kvm_iommu_update_trans(struct s390_domain *s390_domain,
+ dma_addr_t dma_addr, size_t nr_pages,
+ size_t *mapped)
+{
+ struct s390_domain_device *domain_device;
+ unsigned long irq_flags;
+ size_t mapped_pages;
+ int rc = 0;
+ u8 status;
+
+ mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+ rc = dma_table_shadow(s390_domain, dma_addr, nr_pages, &mapped_pages);
+
+ /* If error or no new mappings, leave immediately without refresh */
+ if (rc <= 0)
+ goto exit;
+
+ spin_lock_irqsave(&s390_domain->list_lock, irq_flags);
+ list_for_each_entry(domain_device, &s390_domain->devices, list) {
+ rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+ dma_addr, nr_pages * PAGE_SIZE,
+ &status);
+ if (rc) {
+ if (status == 0)
+ rc = -EINVAL;
+ else
+ rc = -EIO;
+ }
+ }
+ spin_unlock_irqrestore(&s390_domain->list_lock, irq_flags);
+
+exit:
+ if (mapped)
+ *mapped = mapped_pages << PAGE_SHIFT;
+
+ mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+ return rc;
+}
+
+static int s390_kvm_iommu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot,
+ gfp_t gfp)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ size_t nr_pages;
+
+ int rc = 0;
+
+ if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+ return -EINVAL;
+
+ /* Can only perform mapping when a guest IOTA is registered */
+ if (!s390_domain->kvm_dom.map_enabled)
+ return -EINVAL;
+
+ nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ if (!nr_pages)
+ return -EINVAL;
+
+ rc = s390_kvm_iommu_update_trans(s390_domain, iova, nr_pages, NULL);
+
+ return rc;
+}
+
+static int s390_kvm_iommu_map_pages(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t paddr,
+ size_t pgsize, size_t pgcount, int prot,
+ gfp_t gfp, size_t *mapped)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ size_t nr_pages;
+
+ int rc = 0;
+
+ if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+ return -EINVAL;
+
+ /* Can only perform mapping when a guest IOTA is registered */
+ if (!s390_domain->kvm_dom.map_enabled)
+ return -EINVAL;
+
+ nr_pages = pgcount * (pgsize / PAGE_SIZE);
+ if (!nr_pages)
+ return -EINVAL;
+
+ rc = s390_kvm_iommu_update_trans(s390_domain, iova, nr_pages, mapped);
+
+ return rc;
+}
+
+static void free_pt_entry(struct s390_kvm_domain *kvm_dom, int st, int pt)
+{
+ if (!kvm_dom->pt[st][pt])
+ return;
+
+ kvm_dom->unpin((u64)kvm_dom->pt[st][pt]);
+}
+
+static void free_seg_entry(struct s390_kvm_domain *kvm_dom, int entry)
+{
+ int i, st, count = 0;
+
+ for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+ if (kvm_dom->seg[entry + i]) {
+ kvm_dom->unpin((u64)kvm_dom->seg[entry + i]);
+ count++;
+ }
+ }
+
+ if (count == 0)
+ return;
+
+ st = entry / ZPCI_TABLE_PAGES;
+ for (i = 0; i < ZPCI_TABLE_ENTRIES; i++)
+ free_pt_entry(kvm_dom, st, i);
+ kfree(kvm_dom->pt[st]);
+}
+
+static int s390_kvm_clear_ioat_tables(struct s390_domain *s390_domain)
+{
+ struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+ unsigned long *entry;
+ dma_addr_t dma_addr;
+ kvm_pfn_t pfn;
+ int i;
+
+ if (!kvm_dom->kvm || !kvm_dom->map_enabled)
+ return -EINVAL;
+
+ mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+
+ /* Invalidate and unpin remaining guest pages */
+ for (dma_addr = s390_domain->domain.geometry.aperture_start;
+ dma_addr < s390_domain->domain.geometry.aperture_end;
+ dma_addr += PAGE_SIZE) {
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ if (entry && pt_entry_isvalid(*entry)) {
+ pfn = (*entry >> PAGE_SHIFT);
+ invalidate_pt_entry(entry);
+ kvm_dom->unpin(pfn);
+ }
+ }
+
+ /* Unpin all shadow tables */
+ for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+ kvm_dom->unpin((u64)kvm_dom->head[i] >> PAGE_SHIFT);
+ kvm_dom->head[i] = 0;
+ }
+
+ for (i = 0; i < ZPCI_TABLE_ENTRIES_PAGES; i += ZPCI_TABLE_PAGES)
+ free_seg_entry(kvm_dom, i);
+
+ kfree(kvm_dom->seg);
+ kfree(kvm_dom->pt);
+
+ mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+
+ kvm_dom->map_enabled = false;
+
+ return 0;
+}
+
+static void s390_kvm_domain_free(struct iommu_domain *domain)
+{
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+
+ s390_kvm_clear_ioat_tables(s390_domain);
+
+ if (s390_domain->kvm_dom.kvm) {
+ symbol_put(gfn_to_page);
+ symbol_put(kvm_release_pfn_dirty);
+ }
+
+ s390_domain_free(domain);
+}
+
+int zpci_iommu_attach_kvm(struct zpci_dev *zdev, struct kvm *kvm)
+{
+ struct s390_domain *s390_domain = zdev->s390_domain;
+ struct iommu_domain *domain = &s390_domain->domain;
+ struct s390_domain_device *domain_device;
+ unsigned long flags;
+ int rc = 0;
+
+ if (domain->type != IOMMU_DOMAIN_KVM)
+ return -EINVAL;
+
+ if (s390_domain->kvm_dom.kvm != 0)
+ return -EINVAL;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ list_for_each_entry(domain_device, &s390_domain->devices, list) {
+ if (domain_device->zdev->kzdev->kvm != kvm) {
+ rc = -EINVAL;
+ break;
+ }
+ domain_device->zdev->kzdev->dom = domain;
+ }
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+ if (rc)
+ return rc;
+
+ s390_domain->kvm_dom.pin = symbol_get(gfn_to_page);
+ if (!s390_domain->kvm_dom.pin)
+ return -EINVAL;
+
+ s390_domain->kvm_dom.unpin = symbol_get(kvm_release_pfn_dirty);
+ if (!s390_domain->kvm_dom.unpin) {
+ symbol_put(gfn_to_page);
+ return -EINVAL;
+ }
+
+ s390_domain->kvm_dom.kvm = kvm;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_attach_kvm);
+
+int zpci_iommu_kvm_assign_iota(struct zpci_dev *zdev, u64 iota)
+{
+ struct s390_domain *s390_domain = zdev->s390_domain;
+ struct s390_kvm_domain *kvm_dom = &s390_domain->kvm_dom;
+ gpa_t gpa = (gpa_t)(iota & ZPCI_RTE_ADDR_MASK);
+ struct page *page;
+ struct kvm *kvm;
+ unsigned int idx;
+ void *iaddr;
+ int i, rc;
+
+ /* Ensure KVM associated and IOTA not already registered */
+ if (!kvm_dom->kvm || kvm_dom->map_enabled)
+ return -EINVAL;
+
+ /* Ensure supported type specified */
+ if ((iota & ZPCI_IOTA_RTTO_FLAG) != ZPCI_IOTA_RTTO_FLAG)
+ return -EINVAL;
+
+ kvm = kvm_dom->kvm;
+ mutex_lock(&s390_domain->kvm_dom.ioat_lock);
+ idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+ page = kvm_dom->pin(kvm, gpa_to_gfn(gpa));
+ if (is_error_page(page)) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ rc = -EIO;
+ goto unpin;
+ }
+ iaddr = page_to_virt(page) + (gpa & ~PAGE_MASK);
+ kvm_dom->head[i] = (unsigned long *)iaddr;
+ gpa += PAGE_SIZE;
+ }
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ kvm_dom->seg = kcalloc(ZPCI_TABLE_ENTRIES_PAGES,
+ sizeof(unsigned long *), GFP_KERNEL);
+ if (!kvm_dom->seg)
+ goto unpin;
+ kvm_dom->pt = kcalloc(ZPCI_TABLE_ENTRIES, sizeof(unsigned long **),
+ GFP_KERNEL);
+ if (!kvm_dom->pt)
+ goto free_seg;
+
+ mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+ kvm_dom->map_enabled = true;
+ return 0;
+
+free_seg:
+ kfree(kvm_dom->seg);
+ rc = -ENOMEM;
+unpin:
+ for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+ kvm_dom->unpin((u64)kvm_dom->head[i] >> PAGE_SHIFT);
+ kvm_dom->head[i] = 0;
+ }
+ mutex_unlock(&s390_domain->kvm_dom.ioat_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_kvm_assign_iota);
+
+int zpci_iommu_kvm_remove_iota(struct zpci_dev *zdev)
+{
+ struct s390_domain *s390_domain = zdev->s390_domain;
+
+ return s390_kvm_clear_ioat_tables(s390_domain);
+}
+EXPORT_SYMBOL_GPL(zpci_iommu_kvm_remove_iota);
+
+const struct iommu_domain_ops s390_kvm_domain_ops = {
+ .attach_dev = s390_iommu_attach_device,
+ .detach_dev = s390_iommu_detach_device,
+ /*
+ * All iommu mapping and unmapping operations are handled via the map
+ * ops. A map over a given range will synchronize the host and guest
+ * DMA tables, performing the necessary mappings / unmappings to
+ * synchronize the table states.
+ * Partial mapping failures do not require a rewind, the guest will
+ * receive an indication that will trigger a global refresh of the
+ * tables.
+ */
+ .map = s390_kvm_iommu_map,
+ .map_pages = s390_kvm_iommu_map_pages,
+ .unmap = NULL,
+ .unmap_pages = NULL,
+ .iova_to_phys = s390_iommu_iova_to_phys,
+ .free = s390_kvm_domain_free,
+};
--
2.27.0
Powered by blists - more mailing lists