[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4FBEA887.6010908@redhat.com>
Date: Thu, 24 May 2012 17:30:47 -0400
From: Don Dutile <ddutile@...hat.com>
To: Alex Williamson <alex.williamson@...hat.com>
CC: benh@...nel.crashing.org, aik@...abs.ru,
david@...son.dropbear.id.au, joerg.roedel@....com,
dwmw2@...radead.org, chrisw@...s-sol.org, agraf@...e.de,
benve@...co.com, aafabbri@...co.com, B08248@...escale.com,
B07421@...escale.com, avi@...hat.com, konrad.wilk@...cle.com,
kvm@...r.kernel.org, qemu-devel@...gnu.org,
iommu@...ts.linux-foundation.org, linux-pci@...r.kernel.org,
linux-kernel@...r.kernel.org, gregkh@...uxfoundation.org,
bhelgaas@...gle.com
Subject: Re: [PATCH v2 05/13] pci: Add ACS validation utility
On 05/22/2012 01:05 AM, Alex Williamson wrote:
> In a PCI environment, transactions aren't always required to reach
> the root bus before being re-routed. Intermediate switches between
> an endpoint and the root bus can redirect DMA back downstream before
> things like IOMMUs have a chance to intervene. Legacy PCI is always
> susceptible to this as it operates on a shared bus. PCIe added a
> new capability to describe and control this behavior, Access Control
> Services, or ACS. The utility function pci_acs_enabled() allows us
> to test the ACS capabilities of an individual devices against a set
> of flags while pci_acs_path_enabled() tests a complete path from
> a given downstream device up to the specified upstream device. We
> also include the ability to add device specific tests as it's
> likely we'll see devices that do no implement ACS, but want to
> indicate support for various capabilities in this space.
>
> Signed-off-by: Alex Williamson<alex.williamson@...hat.com>
> ---
>
> drivers/pci/pci.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++
> drivers/pci/quirks.c | 29 +++++++++++++++++++
> include/linux/pci.h | 10 ++++++-
> 3 files changed, 114 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 111569c..ab6c2a6 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -2359,6 +2359,82 @@ void pci_enable_acs(struct pci_dev *dev)
> }
>
> /**
> + * pci_acs_enable - test ACS against required flags for a given device
typo: ^^^ missing 'd'
> + * @pdev: device to test
> + * @acs_flags: required PCI ACS flags
> + *
> + * Return true if the device supports the provided flags. Automatically
> + * filters out flags that are not implemented on multifunction devices.
> + */
> +bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
> +{
> + int pos;
> + u16 ctrl;
> +
> + if (pci_dev_specific_acs_enabled(pdev, acs_flags))
> + return true;
> +
> + if (!pci_is_pcie(pdev))
> + return false;
> +
> + if (pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM ||
> + pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
> + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
> + if (!pos)
> + return false;
> +
> + pci_read_config_word(pdev, pos + PCI_ACS_CTRL,&ctrl);
> + if ((ctrl& acs_flags) != acs_flags)
> + return false;
> + } else if (pdev->multifunction) {
> + /* Filter out flags not applicable to multifunction */
> + acs_flags&= (PCI_ACS_RR | PCI_ACS_CR |
> + PCI_ACS_EC | PCI_ACS_DT);
> +
> + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
> + if (!pos)
> + return false;
> +
> + pci_read_config_word(pdev, pos + PCI_ACS_CTRL,&ctrl);
> + if ((ctrl& acs_flags) != acs_flags)
> + return false;
> + }
> +
> + return true;
or, to reduce duplicated code (which compiler may do?):
/* Filter out flags not applicable to multifunction */
if (pdev->multifunction)
acs_flags &= (PCI_ACS_RR | PCI_ACS_CR |
PCI_ACS_EC | PCI_ACS_DT);
if (pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM ||
pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
pdev->multifunction) {
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
if (!pos)
return false;
pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
if ((ctrl & acs_flags) != acs_flags)
return false;
}
return true;
> +}
But the above doesn't handle the case where the RC does not do
peer-to-peer btwn root ports. Per ACS spec, such a RC's root ports
don't need to provide an ACS cap, since peer-to-peer port xfers aren't
allowed/enabled/supported, so by design, the root port is ACS compliant.
ATM, an IOMMU-capable system is a pre-req for VFIO,
and all such systems have an ACS cap, but they may not always be true.
> +EXPORT_SYMBOL_GPL(pci_acs_enabled);
> +
> +/**
> + * pci_acs_path_enable - test ACS flags from start to end in a hierarchy
> + * @start: starting downstream device
> + * @end: ending upstream device or NULL to search to the root bus
> + * @acs_flags: required flags
> + *
> + * Walk up a device tree from start to end testing PCI ACS support. If
> + * any step along the way does not support the required flags, return false.
> + */
> +bool pci_acs_path_enabled(struct pci_dev *start,
> + struct pci_dev *end, u16 acs_flags)
> +{
> + struct pci_dev *pdev, *parent = start;
> +
> + do {
> + pdev = parent;
> +
> + if (!pci_acs_enabled(pdev, acs_flags))
> + return false;
> +
> + if (pci_is_root_bus(pdev->bus))
> + return (end == NULL);
doesn't this mean that a caller can't pass the pdev of the root port?
I would think that is a valid call, albeit not the common one.
Also worried that the above code may be true on Intel machines, but not on AMD
machines (the latter reps its IOMMU as a pdev of root bus, doesn't it?)
> +
> + parent = pdev->bus->self;
> + } while (pdev != end);
> +
> + return true;
> +}
> +EXPORT_SYMBOL_GPL(pci_acs_path_enabled);
> +
> +/**
> * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
> * @dev: the PCI device
> * @pin: the INTx pin (1=INTA, 2=INTB, 3=INTD, 4=INTD)
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index a2dd77f..4ed6aa6 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -3149,3 +3149,32 @@ struct pci_dev *pci_dma_source(struct pci_dev *dev)
>
> return dev;
> }
> +
> +static const struct pci_dev_acs_enabled {
> + u16 vendor;
> + u16 device;
> + bool (*acs_enabled)(struct pci_dev *dev, u16 acs_flags);
> +} pci_dev_acs_enabled[] = {
> + { 0 }
> +};
> +
> +bool pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags)
> +{
> + const struct pci_dev_acs_enabled *i;
> +
> + /*
> + * Allow devices that do not expose standard PCI ACS capabilities
> + * or control to indicate their support here. Multi-function devices
> + * which do not allow internal peer-to-peer between functions, but
> + * do not implement PCI ACS may wish to return true here.
> + */
> + for (i = pci_dev_acs_enabled; i->acs_enabled; i++) {
> + if ((i->vendor == dev->vendor ||
> + i->vendor == (u16)PCI_ANY_ID)&&
> + (i->device == dev->device ||
> + i->device == (u16)PCI_ANY_ID))
> + return i->acs_enabled(dev, acs_flags);
> + }
> +
> + return false;
> +}
I can't wait until these quirks are filled in! :)
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 02dbfed..2559735 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1480,6 +1480,7 @@ enum pci_fixup_pass {
> #ifdef CONFIG_PCI_QUIRKS
> void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
> struct pci_dev *pci_dma_source(struct pci_dev *dev);
> +bool pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
> #else
> static inline void pci_fixup_device(enum pci_fixup_pass pass,
> struct pci_dev *dev) {}
> @@ -1487,6 +1488,11 @@ static inline struct pci_dev *pci_dma_source(struct pci_dev *dev)
> {
> return dev;
> }
> +static inline bool pci_dev_specific_acs_enabled(struct pci_dev *dev,
> + u16 acs_flags)
> +{
> + return false;
> +}
> #endif
>
> void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
> @@ -1589,7 +1595,9 @@ static inline bool pci_is_pcie(struct pci_dev *dev)
> }
>
> void pci_request_acs(void);
> -
> +bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags);
> +bool pci_acs_path_enabled(struct pci_dev *start,
> + struct pci_dev *end, u16 acs_flags);
>
> #define PCI_VPD_LRDT 0x80 /* Large Resource Data Type */
> #define PCI_VPD_LRDT_ID(x) (x | PCI_VPD_LRDT)
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists