[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220504075356.GA2361844@janakin.usersys.redhat.com>
Date: Wed, 4 May 2022 09:53:59 +0200
From: Jan Stancek <jstancek@...hat.com>
To: Lu Baolu <baolu.lu@...ux.intel.com>
Cc: Joerg Roedel <joro@...tes.org>, Jason Gunthorpe <jgg@...dia.com>,
Christoph Hellwig <hch@...radead.org>,
Ben Skeggs <bskeggs@...hat.com>,
Kevin Tian <kevin.tian@...el.com>,
Ashok Raj <ashok.raj@...el.com>, Will Deacon <will@...nel.org>,
Robin Murphy <robin.murphy@....com>,
Alex Williamson <alex.williamson@...hat.com>,
Eric Auger <eric.auger@...hat.com>,
Liu Yi L <yi.l.liu@...el.com>,
Jacob jun Pan <jacob.jun.pan@...el.com>,
David Airlie <airlied@...ux.ie>,
Daniel Vetter <daniel@...ll.ch>,
Thierry Reding <thierry.reding@...il.com>,
Jonathan Hunter <jonathanh@...dia.com>,
iommu@...ts.linux-foundation.org, linux-kernel@...r.kernel.org,
Christoph Hellwig <hch@....de>, jstancek@...hat.com,
bgoncalv@...hat.com
Subject: [bug] NULL pointer deref after 3f6634d997db ("iommu: Use right way
to retrieve iommu_ops")
On Wed, Feb 16, 2022 at 10:52:47AM +0800, Lu Baolu wrote:
>The common iommu_ops is hooked to both device and domain. When a helper
>has both device and domain pointer, the way to get the iommu_ops looks
>messy in iommu core. This sorts out the way to get iommu_ops. The device
>related helpers go through device pointer, while the domain related ones
>go through domain pointer.
>
>Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
>Reviewed-by: Christoph Hellwig <hch@....de>
>Reviewed-by: Jason Gunthorpe <jgg@...dia.com>
>---
> include/linux/iommu.h | 11 ++++++++++
> drivers/iommu/iommu.c | 50 +++++++++++++++++++++----------------------
> 2 files changed, 36 insertions(+), 25 deletions(-)
>
>diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>index 9ffa2e88f3d0..90f1b5e3809d 100644
>--- a/include/linux/iommu.h
>+++ b/include/linux/iommu.h
>@@ -381,6 +381,17 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
> };
> }
>
>+static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
>+{
>+ /*
>+ * Assume that valid ops must be installed if iommu_probe_device()
>+ * has succeeded. The device ops are essentially for internal use
>+ * within the IOMMU subsystem itself, so we should be able to trust
>+ * ourselves not to misuse the helper.
>+ */
>+ return dev->iommu->iommu_dev->ops;
>+}
>+
> #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */
> #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */
> #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */
>diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>index 7cf073c56036..7af0ee670deb 100644
>--- a/drivers/iommu/iommu.c
>+++ b/drivers/iommu/iommu.c
>@@ -323,13 +323,14 @@ int iommu_probe_device(struct device *dev)
>
> void iommu_release_device(struct device *dev)
> {
>- const struct iommu_ops *ops = dev->bus->iommu_ops;
>+ const struct iommu_ops *ops;
>
> if (!dev->iommu)
> return;
>
> iommu_device_unlink(dev->iommu->iommu_dev, dev);
>
>+ ops = dev_iommu_ops(dev);
> ops->release_device(dev);
>
> iommu_group_remove_device(dev);
>@@ -833,8 +834,10 @@ static int iommu_create_device_direct_mappings(struct iommu_group *group,
> static bool iommu_is_attach_deferred(struct iommu_domain *domain,
> struct device *dev)
> {
>- if (domain->ops->is_attach_deferred)
>- return domain->ops->is_attach_deferred(domain, dev);
>+ const struct iommu_ops *ops = dev_iommu_ops(dev);
>+
>+ if (ops->is_attach_deferred)
>+ return ops->is_attach_deferred(domain, dev);
>
> return false;
> }
>@@ -1252,10 +1255,10 @@ int iommu_page_response(struct device *dev,
> struct iommu_fault_event *evt;
> struct iommu_fault_page_request *prm;
> struct dev_iommu *param = dev->iommu;
>+ const struct iommu_ops *ops = dev_iommu_ops(dev);
> bool has_pasid = msg->flags & IOMMU_PAGE_RESP_PASID_VALID;
>- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
>
>- if (!domain || !domain->ops->page_response)
>+ if (!ops->page_response)
> return -ENODEV;
>
> if (!param || !param->fault_param)
>@@ -1296,7 +1299,7 @@ int iommu_page_response(struct device *dev,
> msg->pasid = 0;
> }
>
>- ret = domain->ops->page_response(dev, evt, msg);
>+ ret = ops->page_response(dev, evt, msg);
> list_del(&evt->list);
> kfree(evt);
> break;
>@@ -1521,7 +1524,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_group);
>
> static int iommu_get_def_domain_type(struct device *dev)
> {
>- const struct iommu_ops *ops = dev->bus->iommu_ops;
>+ const struct iommu_ops *ops = dev_iommu_ops(dev);
>
> if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
> return IOMMU_DOMAIN_DMA;
>@@ -1580,7 +1583,7 @@ static int iommu_alloc_default_domain(struct iommu_group *group,
> */
> static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
> {
>- const struct iommu_ops *ops = dev->bus->iommu_ops;
>+ const struct iommu_ops *ops = dev_iommu_ops(dev);
> struct iommu_group *group;
> int ret;
>
>@@ -1588,9 +1591,6 @@ static struct iommu_group *iommu_group_get_for_dev(struct device *dev)
> if (group)
> return group;
>
>- if (!ops)
>- return ERR_PTR(-EINVAL);
>-
> group = ops->device_group(dev);
> if (WARN_ON_ONCE(group == NULL))
> return ERR_PTR(-EINVAL);
>@@ -1759,10 +1759,10 @@ static int __iommu_group_dma_attach(struct iommu_group *group)
>
> static int iommu_group_do_probe_finalize(struct device *dev, void *data)
> {
>- struct iommu_domain *domain = data;
>+ const struct iommu_ops *ops = dev_iommu_ops(dev);
>
>- if (domain->ops->probe_finalize)
>- domain->ops->probe_finalize(dev);
>+ if (ops->probe_finalize)
>+ ops->probe_finalize(dev);
>
> return 0;
> }
>@@ -2020,7 +2020,7 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
>
> int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
> {
>- const struct iommu_ops *ops = domain->ops;
>+ const struct iommu_ops *ops = dev_iommu_ops(dev);
>
> if (ops->is_attach_deferred && ops->is_attach_deferred(domain, dev))
> return __iommu_attach_device(domain, dev);
>@@ -2579,17 +2579,17 @@ EXPORT_SYMBOL_GPL(iommu_set_pgtable_quirks);
>
> void iommu_get_resv_regions(struct device *dev, struct list_head *list)
> {
>- const struct iommu_ops *ops = dev->bus->iommu_ops;
>+ const struct iommu_ops *ops = dev_iommu_ops(dev);
Hi,
I'm getting panics after hunk above was applied in this patch
on ppc64le KVM guest, dev->iommu is NULL.
Can be reproduced with LTP read_all_sys test or just cat on following file:
# cat /sys/kernel/iommu_groups/0/reserved_regions
[20065.322087] BUG: Kernel NULL pointer dereference on read at 0x000000b8
[20065.323563] Faulting instruction address: 0xc000000000cb7c1c
[20065.323625] Oops: Kernel access of bad area, sig: 11 [#1]
[20065.323671] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
[20065.323729] Modules linked in: kvm_pr kvm vfio_iommu_spapr_tce vfio_spapr_eeh vfio vhost_net tap vhost_vsock vhost vhost_iotlb snd_seq_dummy dummy msdos minix binfmt_misc vcan can_raw nfsv3 nfs_acl nfs lockd grace fscache netfs tun brd overlay exfat vfat fat vsock_loopback vmw_vsock_virtio_transport_common vsock can_bcm can veth n_gsm pps_ldisc ppp_synctty mkiss ax25 ppp_async ppp_generic serport slcan slip slhc loop snd_hrtimer snd_seq snd_seq_device snd_timer snd soundcore pcrypt crypto_user n_hdlc tls rfkill sunrpc joydev virtio_net net_failover failover virtio_console virtio_balloon crct10dif_vpmsum fuse zram xfs virtio_blk vmx_crypto crc32c_vpmsum ipmi_devintf ipmi_msghandler [last unloaded: vcan]
[20065.324308] CPU: 3 PID: 119528 Comm: read_all Not tainted 5.18.0-0.rc5.9050ba3a61a4b5b.41.test.fc37.ppc64le #1
[20065.324402] NIP: c000000000cb7c1c LR: c000000000cb7ba0 CTR: 0000000000000000
[20065.324468] REGS: c00000012a3e3660 TRAP: 0300 Not tainted (5.18.0-0.rc5.9050ba3a61a4b5b.41.test.fc37.ppc64le)
[20065.324560] MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 44008804 XER: 00000000
[20065.324638] CFAR: 0000000000002674 DAR: 00000000000000b8 DSISR: 40000000 IRQMASK: 0
[20065.324638] GPR00: c000000000cb7ba0 c00000012a3e3900 c000000002b5a500 c00000000cd610b0
[20065.324638] GPR04: c00000000c3bb0c8 0000000000000004 c00000012a3e37ac 000000023c480000
[20065.324638] GPR08: 000000023c480000 0000000000000000 0000000000000000 a0f18a8800000000
[20065.324638] GPR12: 0000000084008800 c00000003fffae80 0000000000000000 0000000000000000
[20065.324638] GPR16: 0000000010060878 0000000000000008 00000000100607b8 c00000000c3bb058
[20065.324638] GPR20: c00000000c3bb048 c00000000a034fe0 5deadbeef0000100 5deadbeef0000122
[20065.324638] GPR24: fffffffffffff000 c00000012a3e3920 0000000000000000 c00000012a3e3930
[20065.324638] GPR28: c00000012a3e3a20 c00000000cf74c00 c0000000014ab060 c00000001a765ef0
[20065.325248] NIP [c000000000cb7c1c] iommu_get_group_resv_regions+0xcc/0x490
[20065.325310] LR [c000000000cb7ba0] iommu_get_group_resv_regions+0x50/0x490
[20065.325368] Call Trace:
[20065.325391] [c00000012a3e3900] [c000000000cb7ba0] iommu_get_group_resv_regions+0x50/0x490 (unreliable)
[20065.325474] [c00000012a3e39c0] [c000000000cb8024] iommu_group_show_resv_regions+0x44/0x140
[20065.325544] [c00000012a3e3a70] [c000000000cb5478] iommu_group_attr_show+0x38/0x60
[20065.325616] [c00000012a3e3a90] [c00000000070536c] sysfs_kf_seq_show+0xbc/0x1d0
[20065.325686] [c00000012a3e3b20] [c000000000702124] kernfs_seq_show+0x44/0x60
[20065.325746] [c00000012a3e3b40] [c00000000061bd2c] seq_read_iter+0x26c/0x6e0
[20065.325805] [c00000012a3e3c20] [c000000000702fd4] kernfs_fop_read_iter+0x254/0x2e0
[20065.325877] [c00000012a3e3c70] [c0000000005cf3d0] new_sync_read+0x110/0x170
[20065.325938] [c00000012a3e3d10] [c0000000005d2770] vfs_read+0x1d0/0x240
[20065.326002] [c00000012a3e3d60] [c0000000005d2ee8] ksys_read+0x78/0x130
[20065.326063] [c00000012a3e3db0] [c0000000000303f8] system_call_exception+0x198/0x3a0
[20065.326133] [c00000012a3e3e10] [c00000000000c64c] system_call_common+0xec/0x250
[20065.326205] --- interrupt: c00 at 0x7fff849dda64
[20065.326253] NIP: 00007fff849dda64 LR: 00000000100124a4 CTR: 0000000000000000
[20065.326319] REGS: c00000012a3e3e80 TRAP: 0c00 Not tainted (5.18.0-0.rc5.9050ba3a61a4b5b.41.test.fc37.ppc64le)
[20065.326408] MSR: 800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE> CR: 28002802 XER: 00000000
[20065.326501] IRQMASK: 0
[20065.326501] GPR00: 0000000000000003 00007ffff1cb8fd0 00007fff84af6e00 0000000000000003
[20065.326501] GPR04: 00007ffff1cb9070 00000000000003ff 0000000000000000 0000000000000000
[20065.326501] GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[20065.326501] GPR12: 0000000000000000 00007fff84bca640 0000000000000000 0000000000000000
[20065.326501] GPR16: 0000000010060878 0000000000000008 00000000100607b8 0000000010044cc0
[20065.326501] GPR20: 0000000000000000 0000000010044640 00007ffff1cb949a 00000000100404c8
[20065.326501] GPR24: 0000000010040140 0000000010060660 0000000010040110 0000000010040020
[20065.326501] GPR28: 0000000010060698 00007fff84880000 00007ffff1cb909b 0000000000000003
[20065.327053] NIP [00007fff849dda64] 0x7fff849dda64
[20065.327099] LR [00000000100124a4] 0x100124a4
[20065.327144] --- interrupt: c00
[20065.327179] Instruction dump:
[20065.327214] 66f7f000 fbc100b0 fb210088 62d60100 3b210020 fb610098 62f70122 3b610030
[20065.327289] e8750010 fb210020 fb210028 e9230560 <e92900b8> e9290010 e9890030 2c2c0000
[20065.327367] ---[ end trace 0000000000000000 ]---
# ll /sys/kernel/iommu_groups/0/devices/
total 0
lrwxrwxrwx. 1 root root 0 May 4 03:08 0000:00:01.0 -> ../../../../devices/pci0000:00/0000:00:01.0
lrwxrwxrwx. 1 root root 0 May 4 03:08 0000:00:02.0 -> ../../../../devices/pci0000:00/0000:00:02.0
lrwxrwxrwx. 1 root root 0 May 4 03:08 0000:00:03.0 -> ../../../../devices/pci0000:00/0000:00:03.0
lrwxrwxrwx. 1 root root 0 May 4 03:08 0000:00:04.0 -> ../../../../devices/pci0000:00/0000:00:04.0
# lspci -v
00:01.0 Ethernet controller: Red Hat, Inc. Virtio network device
Subsystem: Red Hat, Inc. Device 0001
Device tree node: /sys/firmware/devicetree/base/pci@...000020000000/ethernet@1
Flags: bus master, fast devsel, latency 0, IRQ 20, IOMMU group 0
I/O ports at 0080 [size=32]
Memory at 100b0002000 (32-bit, non-prefetchable) [size=4K]
Expansion ROM at 100b0040000 [disabled] [size=256K]
Capabilities: [40] MSI-X: Enable+ Count=3 Masked-
Kernel driver in use: virtio-pci
00:02.0 USB controller: Apple Inc. KeyLargo/Intrepid USB (prog-if 10 [OHCI])
Subsystem: Red Hat, Inc. QEMU Virtual Machine
Device tree node: /sys/firmware/devicetree/base/pci@...000020000000/usb@2
Flags: bus master, fast devsel, latency 0, IRQ 19, IOMMU group 0
Memory at 100b0001000 (32-bit, non-prefetchable) [size=256]
Kernel driver in use: ohci-pci
00:03.0 SCSI storage controller: Red Hat, Inc. Virtio block device
Subsystem: Red Hat, Inc. Device 0002
Device tree node: /sys/firmware/devicetree/base/pci@...000020000000/scsi@3
Flags: bus master, fast devsel, latency 0, IRQ 18, IOMMU group 0
I/O ports at 0040 [size=64]
Memory at 100b0000000 (32-bit, non-prefetchable) [size=4K]
Capabilities: [40] MSI-X: Enable+ Count=2 Masked-
Kernel driver in use: virtio-pci
00:04.0 Unclassified device [00ff]: Red Hat, Inc. Virtio memory balloon
Subsystem: Red Hat, Inc. Device 0005
Device tree node: /sys/firmware/devicetree/base/pci@...000020000000/unknown-legacy-device@4
Flags: bus master, fast devsel, latency 0, IRQ 17, IOMMU group 0
I/O ports at 0020 [size=32]
Kernel driver in use: virtio-pci
full console log from CKI test run:
https://s3.amazonaws.com/arr-cki-prod-trusted-artifacts/trusted-artifacts/530073525/test%20ppc64le/2407075056/artifacts/test_tasks/recipes/11913490/logs/console.log
kernel config:
https://s3.amazonaws.com/arr-cki-prod-trusted-artifacts/trusted-artifacts/530073525/publish%20ppc64le/2407075021/artifacts/kernel-ppc64le.config
Regards,
Jan
Powered by blists - more mailing lists