linux-kernel - Re: [PATCH 10/11] PCI: mvebu: Implement support for legacy INTx interrupts

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220106154447.aie6taiuvav5wu6y@pali>
Date:   Thu, 6 Jan 2022 16:44:47 +0100
From:   Pali Rohár <pali@...nel.org>
To:     Marc Zyngier <maz@...nel.org>
Cc:     Lorenzo Pieralisi <lorenzo.pieralisi@....com>,
        Bjorn Helgaas <bhelgaas@...gle.com>,
        Rob Herring <robh+dt@...nel.org>,
        Thomas Petazzoni <thomas.petazzoni@...tlin.com>,
        Krzysztof Wilczyński <kw@...ux.com>,
        Marek Behún <kabel@...nel.org>,
        Russell King <rmk+kernel@...linux.org.uk>,
        linux-pci@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH 10/11] PCI: mvebu: Implement support for legacy INTx
 interrupts

On Thursday 06 January 2022 15:28:20 Marc Zyngier wrote:
> On Wed, 05 Jan 2022 15:02:38 +0000,
> Pali Rohár <pali@...nel.org> wrote:
> > 
> > This adds support for legacy INTx interrupts received from other PCIe
> > devices and which are reported by a new INTx irq chip.
> > 
> > With this change, kernel can distinguish between INTA, INTB, INTC and INTD
> > interrupts.
> > 
> > Note that for this support, device tree files has to be properly adjusted
> > to provide "interrupts" or "interrupts-extended" property with intx
> > interrupt source, "interrupt-names" property with "intx" string and also
> > 'interrupt-controller' subnode must be defined.
> > 
> > If device tree files do not provide these nodes then driver would work as
> > before.
> > 
> > Signed-off-by: Pali Rohár <pali@...nel.org>
> > ---
> >  drivers/pci/controller/pci-mvebu.c | 182 +++++++++++++++++++++++++++--
> >  1 file changed, 174 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
> > index 1e90ab888075..04bcdd7b7a6d 100644
> > --- a/drivers/pci/controller/pci-mvebu.c
> > +++ b/drivers/pci/controller/pci-mvebu.c
> > @@ -54,9 +54,10 @@
> >  	 PCIE_CONF_ADDR_EN)
> >  #define PCIE_CONF_DATA_OFF	0x18fc
> >  #define PCIE_INT_CAUSE_OFF	0x1900
> > +#define PCIE_INT_UNMASK_OFF	0x1910
> > +#define  PCIE_INT_INTX(i)		BIT(24+i)
> >  #define  PCIE_INT_PM_PME		BIT(28)
> > -#define PCIE_MASK_OFF		0x1910
> > -#define  PCIE_MASK_ENABLE_INTS          0x0f000000
> > +#define  PCIE_INT_ALL_MASK		GENMASK(31, 0)
> >  #define PCIE_CTRL_OFF		0x1a00
> >  #define  PCIE_CTRL_X1_MODE		0x0001
> >  #define  PCIE_CTRL_RC_MODE		BIT(1)
> > @@ -110,6 +111,10 @@ struct mvebu_pcie_port {
> >  	struct mvebu_pcie_window iowin;
> >  	u32 saved_pcie_stat;
> >  	struct resource regs;
> > +	struct irq_domain *intx_irq_domain;
> > +	struct irq_chip intx_irq_chip;
> 
> Why is this structure per port? It really should be global. Printing
> the port number in the name isn't enough of a reason.

Because each port has its own independent set of INTA-INTD interrupts.

> > +	raw_spinlock_t irq_lock;
> > +	int intx_irq;
> >  };
> >  
> >  static inline void mvebu_writel(struct mvebu_pcie_port *port, u32 val, u32 reg)
> > @@ -235,7 +240,7 @@ static void mvebu_pcie_setup_wins(struct mvebu_pcie_port *port)
> >  
> >  static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> >  {
> > -	u32 ctrl, lnkcap, cmd, dev_rev, mask;
> > +	u32 ctrl, lnkcap, cmd, dev_rev, unmask;
> >  
> >  	/* Setup PCIe controller to Root Complex mode. */
> >  	ctrl = mvebu_readl(port, PCIE_CTRL_OFF);
> > @@ -288,10 +293,30 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port)
> >  	/* Point PCIe unit MBUS decode windows to DRAM space. */
> >  	mvebu_pcie_setup_wins(port);
> >  
> > -	/* Enable interrupt lines A-D. */
> > -	mask = mvebu_readl(port, PCIE_MASK_OFF);
> > -	mask |= PCIE_MASK_ENABLE_INTS;
> > -	mvebu_writel(port, mask, PCIE_MASK_OFF);
> > +	/* Mask all interrupt sources. */
> > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > +
> > +	/* Clear all interrupt causes. */
> > +	mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > +
> > +	if (port->intx_irq <= 0) {
> > +		/*
> > +		 * When neither "summary" interrupt, nor "intx" interrupt was
> > +		 * specified in DT then unmask all legacy INTx interrupts as in
> > +		 * this case driver does not provide a way for masking and
> > +		 * unmasking of individual legacy INTx interrupts. In this case
> > +		 * all interrupts, including legacy INTx are reported via one
> > +		 * shared GIC source and therefore kernel cannot distinguish
> > +		 * which individual legacy INTx was triggered. These interrupts
> > +		 * are shared, so it should not cause any issue. Just
> > +		 * performance penalty as every PCIe interrupt handler needs to
> > +		 * be called when some interrupt is triggered.
> > +		 */
> > +		unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > +		unmask |= PCIE_INT_INTX(0) | PCIE_INT_INTX(1) |
> > +			  PCIE_INT_INTX(2) | PCIE_INT_INTX(3);
> > +		mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> 
> Maybe worth printing a warning here, so that the user knows they are
> on thin ice.

Ok. I can add it here. Anyway, this is default current state without
this patch.

> > +	}
> >  }
> >  
> >  static struct mvebu_pcie_port *mvebu_pcie_find_port(struct mvebu_pcie *pcie,
> > @@ -924,6 +949,109 @@ static struct pci_ops mvebu_pcie_ops = {
> >  	.write = mvebu_pcie_wr_conf,
> >  };
> >  
> > +static void mvebu_pcie_intx_irq_mask(struct irq_data *d)
> > +{
> > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > +	unsigned long flags;
> > +	u32 unmask;
> > +
> > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > +	unmask &= ~PCIE_INT_INTX(hwirq);
> > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > +}
> > +
> > +static void mvebu_pcie_intx_irq_unmask(struct irq_data *d)
> > +{
> > +	struct mvebu_pcie_port *port = d->domain->host_data;
> > +	irq_hw_number_t hwirq = irqd_to_hwirq(d);
> > +	unsigned long flags;
> > +	u32 unmask;
> > +
> > +	raw_spin_lock_irqsave(&port->irq_lock, flags);
> > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> > +	unmask |= PCIE_INT_INTX(hwirq);
> > +	mvebu_writel(port, unmask, PCIE_INT_UNMASK_OFF);
> > +	raw_spin_unlock_irqrestore(&port->irq_lock, flags);
> > +}
> > +
> > +static int mvebu_pcie_intx_irq_map(struct irq_domain *h,
> > +				   unsigned int virq, irq_hw_number_t hwirq)
> > +{
> > +	struct mvebu_pcie_port *port = h->host_data;
> > +
> > +	irq_set_status_flags(virq, IRQ_LEVEL);
> > +	irq_set_chip_and_handler(virq, &port->intx_irq_chip, handle_level_irq);
> > +	irq_set_chip_data(virq, port);
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct irq_domain_ops mvebu_pcie_intx_irq_domain_ops = {
> > +	.map = mvebu_pcie_intx_irq_map,
> > +	.xlate = irq_domain_xlate_onecell,
> > +};
> > +
> > +static int mvebu_pcie_init_irq_domain(struct mvebu_pcie_port *port)
> > +{
> > +	struct device *dev = &port->pcie->pdev->dev;
> > +	struct device_node *pcie_intc_node;
> > +
> > +	raw_spin_lock_init(&port->irq_lock);
> > +
> > +	port->intx_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL,
> > +						  "mvebu-%s-INTx",
> > +						  port->name);
> 
> That's exactly what I really don't want to see. It prevents sharing of
> the irq_chip structure, and gets in the way of making it const in the
> future. Yes, I know that some drivers do that. I can't fix those,
> because /proc/interrupts is ABI. But I really don't want to see more
> of these.

Well, I do not understand why it should be shared and with who. HW has N
independent IRQ chips for legacy interrupts. And each one will be
specified in DT per HW layout / design.

> /sys/kernel/debug/irqs already has all the information you need, as it
> will happily give you the domain name and the interrupt topology.
> 
> > +	port->intx_irq_chip.irq_mask = mvebu_pcie_intx_irq_mask;
> > +	port->intx_irq_chip.irq_unmask = mvebu_pcie_intx_irq_unmask;
> > +
> > +	pcie_intc_node = of_get_next_child(port->dn, NULL);
> > +	if (!pcie_intc_node) {
> > +		dev_err(dev, "No PCIe Intc node found for %s\n", port->name);
> > +		return -ENODEV;
> > +	}
> > +
> > +	port->intx_irq_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
> > +						      &mvebu_pcie_intx_irq_domain_ops,
> > +						      port);
> > +	of_node_put(pcie_intc_node);
> > +	if (!port->intx_irq_domain) {
> > +		devm_kfree(dev, port->intx_irq_chip.name);
> > +		dev_err(dev, "Failed to get INTx IRQ domain for %s\n", port->name);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void mvebu_pcie_irq_handler(struct irq_desc *desc)
> > +{
> > +	struct mvebu_pcie_port *port = irq_desc_get_handler_data(desc);
> > +	struct irq_chip *chip = irq_desc_get_chip(desc);
> > +	struct device *dev = &port->pcie->pdev->dev;
> > +	u32 cause, unmask, status;
> > +	int i;
> > +
> > +	chained_irq_enter(chip, desc);
> > +
> > +	cause = mvebu_readl(port, PCIE_INT_CAUSE_OFF);
> > +	unmask = mvebu_readl(port, PCIE_INT_UNMASK_OFF);
> 
> Why do you need to read this? If the CAUSE register also returns the
> masked interrupts that are pending,

Yes, it returns all interrupts, even those which are masked for which is
not system interested.

> it may be worth keeping a shadow
> copy of the this register, as you end-up having an extra MMIO read on
> each and every interrupt, which can't be great for performance.

Ok. I can set shadow copy of unmask register into port structure. But it
has really performance impact when reading directly unmask register from
hw and when reading it from port shadow copy structure?

> > +	status = cause & unmask;
> > +
> > +	/* Process legacy INTx interrupts */
> > +	for (i = 0; i < PCI_NUM_INTX; i++) {
> > +		if (!(status & PCIE_INT_INTX(i)))
> > +			continue;
> > +
> > +		if (generic_handle_domain_irq(port->intx_irq_domain, i) == -EINVAL)
> > +			dev_err_ratelimited(dev, "unexpected INT%c IRQ\n", (char)i+'A');
> > +	}
> > +
> > +	chained_irq_exit(chip, desc);
> > +}
> > +
> >  static int mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
> >  {
> >  	/* Interrupt support on mvebu emulated bridges is not implemented yet */
> > @@ -1121,6 +1249,16 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
> >  		port->io_attr = -1;
> >  	}
> >  
> > +	/*
> > +	 * Old DT bindings do not contain "intx" interrupt
> > +	 * so do not fail probing driver when interrupt does not exist.
> > +	 */
> > +	port->intx_irq = of_irq_get_byname(child, "intx");
> > +	if (port->intx_irq == -EPROBE_DEFER) {
> > +		ret = port->intx_irq;
> > +		goto err;
> > +	}
> > +
> >  	reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags);
> >  	if (reset_gpio == -EPROBE_DEFER) {
> >  		ret = reset_gpio;
> > @@ -1317,6 +1455,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
> >  
> >  	for (i = 0; i < pcie->nports; i++) {
> >  		struct mvebu_pcie_port *port = &pcie->ports[i];
> > +		int irq = port->intx_irq;
> >  
> >  		child = port->dn;
> >  		if (!child)
> > @@ -1344,6 +1483,22 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
> >  			continue;
> >  		}
> >  
> > +		if (irq > 0) {
> > +			ret = mvebu_pcie_init_irq_domain(port);
> > +			if (ret) {
> > +				dev_err(dev, "%s: cannot init irq domain\n",
> > +					port->name);
> > +				pci_bridge_emul_cleanup(&port->bridge);
> > +				devm_iounmap(dev, port->base);
> > +				port->base = NULL;
> > +				mvebu_pcie_powerdown(port);
> > +				continue;
> > +			}
> > +			irq_set_chained_handler_and_data(irq,
> > +							 mvebu_pcie_irq_handler,
> > +							 port);
> > +		}
> > +
> >  		/*
> >  		 * PCIe topology exported by mvebu hw is quite complicated. In
> >  		 * reality has something like N fully independent host bridges
> > @@ -1448,6 +1603,7 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
> >  
> >  	for (i = 0; i < pcie->nports; i++) {
> >  		struct mvebu_pcie_port *port = &pcie->ports[i];
> > +		int irq = port->intx_irq;
> >  
> >  		if (!port->base)
> >  			continue;
> > @@ -1458,7 +1614,17 @@ static int mvebu_pcie_remove(struct platform_device *pdev)
> >  		mvebu_writel(port, cmd, PCIE_CMD_OFF);
> >  
> >  		/* Mask all interrupt sources. */
> > -		mvebu_writel(port, 0, PCIE_MASK_OFF);
> > +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_UNMASK_OFF);
> > +
> > +		/* Clear all interrupt causes. */
> > +		mvebu_writel(port, ~PCIE_INT_ALL_MASK, PCIE_INT_CAUSE_OFF);
> > +
> > +		/* Remove IRQ domains. */
> > +		if (port->intx_irq_domain)
> > +			irq_domain_remove(port->intx_irq_domain);
> > +
> > +		if (irq > 0)
> > +			irq_set_chained_handler_and_data(irq, NULL, NULL);
> >  
> >  		/* Free config space for emulated root bridge. */
> >  		pci_bridge_emul_cleanup(&port->bridge);
> 
> Thanks,
> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.