linux-kernel - Re: [PATCH v4 07/10] s390/pci: Store PCI error information for passthrough devices

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d22cb26b864362454ace07ed5fcb9758c40ee32e.camel@linux.ibm.com>
Date: Thu, 25 Sep 2025 16:28:04 +0200
From: Niklas Schnelle <schnelle@...ux.ibm.com>
To: Farhan Ali <alifm@...ux.ibm.com>, linux-s390@...r.kernel.org,
        kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-pci@...r.kernel.org
Cc: alex.williamson@...hat.com, helgaas@...nel.org, clg@...hat.com,
        mjrosato@...ux.ibm.com
Subject: Re: [PATCH v4 07/10] s390/pci: Store PCI error information for
 passthrough devices

On Wed, 2025-09-24 at 10:16 -0700, Farhan Ali wrote:
> For a passthrough device we need co-operation from user space to recover
> the device. This would require to bubble up any error information to user
> space.  Let's store this error information for passthrough devices, so it
> can be retrieved later.
> 
> Signed-off-by: Farhan Ali <alifm@...ux.ibm.com>
> ---
>  arch/s390/include/asm/pci.h      | 28 ++++++++++
>  arch/s390/pci/pci.c              |  1 +
>  arch/s390/pci/pci_event.c        | 95 +++++++++++++++++++-------------
>  drivers/vfio/pci/vfio_pci_zdev.c |  2 +
>  4 files changed, 88 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index f47f62fc3bfd..40bfe5721109 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -116,6 +116,31 @@ struct zpci_bus {
>  	enum pci_bus_speed	max_bus_speed;
>  };
>  
> +/* Content Code Description for PCI Function Error */
> +struct zpci_ccdf_err {
> +	u32 reserved1;
> +	u32 fh;                         /* function handle */
> +	u32 fid;                        /* function id */
> +	u32 ett         :  4;           /* expected table type */
> +	u32 mvn         : 12;           /* MSI vector number */
> +	u32 dmaas       :  8;           /* DMA address space */
> +	u32 reserved2   :  6;
> +	u32 q           :  1;           /* event qualifier */
> +	u32 rw          :  1;           /* read/write */
> +	u64 faddr;                      /* failing address */
> +	u32 reserved3;
> +	u16 reserved4;
> +	u16 pec;                        /* PCI event code */
> +} __packed;
> +
> +#define ZPCI_ERR_PENDING_MAX 4
> +struct zpci_ccdf_pending {
> +	u8 count;
> +	u8 head;
> +	u8 tail;
> +	struct zpci_ccdf_err err[ZPCI_ERR_PENDING_MAX];
> +};

Thanks this looks more reasonably sized. 

> +
>  /* Private data per function */
>  struct zpci_dev {
>  	struct zpci_bus *zbus;
> @@ -191,6 +216,8 @@ struct zpci_dev {
>  	struct iommu_domain *s390_domain; /* attached IOMMU domain */
>  	struct kvm_zdev *kzdev;
>  	struct mutex kzdev_lock;
> +	struct zpci_ccdf_pending pending_errs;
> +	struct mutex pending_errs_lock;
>  	spinlock_t dom_lock;		/* protect s390_domain change */
>  };
>  
--- snip ---
> +static void zpci_store_pci_error(struct pci_dev *pdev,
> +				 struct zpci_ccdf_err *ccdf)
> +{
> +	struct zpci_dev *zdev = to_zpci(pdev);
> +	int i;
> +
> +	mutex_lock(&zdev->pending_errs_lock);
> +	if (zdev->pending_errs.count >= ZPCI_ERR_PENDING_MAX) {
> +		pr_err("%s: Maximum number (%d) of pending error events queued",
> +				pci_name(pdev), ZPCI_ERR_PENDING_MAX);

So for a vfio-pci user which doesn't pick up the error information but
does reset on error and thus recovers we would leave just the 4 first
errors that occurred in the pending_errs and get this message once. I
think that is okay and maybe even preferrable since most errors are,
well, errors. And often the first time something went wrong is the
interesting one. So I think this makes sense.

> +		mutex_unlock(&zdev->pending_errs_lock);
> +		return;
> +	}
> +
> +	i = zdev->pending_errs.tail % ZPCI_ERR_PENDING_MAX;
> +	memcpy(&zdev->pending_errs.err[i], ccdf, sizeof(struct zpci_ccdf_err));
> +	zdev->pending_errs.tail++;
> +	zdev->pending_errs.count++;
> +	mutex_unlock(&zdev->pending_errs_lock);
> +}
> +
> +void zpci_cleanup_pending_errors(struct zpci_dev *zdev)
> +{
> +	struct pci_dev *pdev = NULL;
> +
> +	mutex_lock(&zdev->pending_errs_lock);
> +	pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);

I think you missed my comment on the previous version. This is missing
the matching pci_dev_put() for the pci_get_slot().

> +	if (zdev->pending_errs.count)
> +		pr_info("%s: Unhandled PCI error events count=%d",
> +				pci_name(pdev), zdev->pending_errs.count);
> +	memset(&zdev->pending_errs, 0, sizeof(struct zpci_ccdf_pending));
> +	mutex_unlock(&zdev->pending_errs_lock);
> +}
> +EXPORT_SYMBOL_GPL(zpci_cleanup_pending_errors);
> +
> 
--- snip ---
>  
> @@ -322,12 +340,13 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
>  		break;
>  	case 0x0040: /* Service Action or Error Recovery Failed */
>  	case 0x003b:
> -		zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
> +		zpci_event_io_failure(pdev, pci_channel_io_perm_failure, ccdf);
>  		break;
>  	default: /* PCI function left in the error state attempt to recover */
> -		ers_res = zpci_event_attempt_error_recovery(pdev);
> +		ers_res = zpci_event_attempt_error_recovery(pdev, ccdf);
>  		if (ers_res != PCI_ERS_RESULT_RECOVERED)
> -			zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
> +			zpci_event_io_failure(pdev, pci_channel_io_perm_failure,
> +					ccdf);

Nit: I'd just keep the above on one line. It's still below the 100
columns limit and just cleaner on one line.

>  		break;
>  	}
>  	pci_dev_put(pdev);
> diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
> index a7bc23ce8483..2be37eab9279 100644
> --- a/drivers/vfio/pci/vfio_pci_zdev.c
> +++ b/drivers/vfio/pci/vfio_pci_zdev.c
> @@ -168,6 +168,8 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
>  
>  	zdev->mediated_recovery = false;
>  
> +	zpci_cleanup_pending_errors(zdev);
> +
>  	if (!vdev->vdev.kvm)
>  		return;
>