linux-kernel - Re: [PATCH v2 20/22] irqchip/gic-v5: Add GICv5 ITS support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4f9f1dab-118e-4d5e-acf2-33da5e0a4905@kernel.org>
Date: Wed, 30 Apr 2025 09:28:08 +0200
From: Jiri Slaby <jirislaby@...nel.org>
To: Lorenzo Pieralisi <lpieralisi@...nel.org>, Marc Zyngier <maz@...nel.org>,
 Thomas Gleixner <tglx@...utronix.de>, Rob Herring <robh@...nel.org>,
 Krzysztof Kozlowski <krzk+dt@...nel.org>, Conor Dooley
 <conor+dt@...nel.org>, Catalin Marinas <catalin.marinas@....com>,
 Will Deacon <will@...nel.org>
Cc: Arnd Bergmann <arnd@...db.de>, Sascha Bischoff <sascha.bischoff@....com>,
 Timothy Hayes <timothy.hayes@....com>,
 "Liam R. Howlett" <Liam.Howlett@...cle.com>,
 Mark Rutland <mark.rutland@....com>, linux-arm-kernel@...ts.infradead.org,
 linux-kernel@...r.kernel.org, devicetree@...r.kernel.org
Subject: Re: [PATCH v2 20/22] irqchip/gic-v5: Add GICv5 ITS support

On 24. 04. 25, 12:25, Lorenzo Pieralisi wrote:
> --- /dev/null
> +++ b/drivers/irqchip/irq-gic-v5-its.c
> @@ -0,0 +1,1293 @@
...
> +static u32 its_readl_relaxed(struct gicv5_its_chip_data *its_node,
> +			     const u64 reg_offset)

I wonder -- can the offset be u64 at all?

> +{
> +	return readl_relaxed(its_node->its_base + reg_offset);
> +}
> +
> +static void its_writel_relaxed(struct gicv5_its_chip_data *its_node,
> +			       const u32 val, const u64 reg_offset)
> +{
> +	writel_relaxed(val, its_node->its_base + reg_offset);
> +}
> +
> +static void its_writeq_relaxed(struct gicv5_its_chip_data *its_node,
> +			       const u64 val, const u64 reg_offset)
> +{
> +	writeq_relaxed(val, its_node->its_base + reg_offset);
> +}
> +
> +static void its_write_table_entry(struct gicv5_its_chip_data *its,
> +				  __le64 *entry, u64 val)
> +{
> +	WRITE_ONCE(*entry, val);

This triggers a warning with the le/be checker enabled, right? You 
likely need cpu_to_le64() or __force.

> +	if (its->flags & ITS_FLAGS_NON_COHERENT)
> +		dcache_clean_inval_poc((unsigned long)entry,
> +				       (unsigned long)entry + sizeof(*entry));
> +	else
> +		dsb(ishst);
> +}
> +
> +#define gicv5_its_wait_for_op(base, reg, mask)				\

What's the purpose of this not being an inline?

> +	({								\
> +		int ret;						\
> +									\
> +		ret = gicv5_wait_for_op(base, reg, mask, NULL);		\
> +		if (unlikely(ret == -ETIMEDOUT))			\
> +			pr_err_ratelimited(#reg" timeout...\n");	\

Ah, this. Is it worth it? At least you should not clobber variables like 
"ret". Also grepping sources for "GICV5_ITS_STATUSR timeout..." would be 
clueless anyway. Yeah, at least there would be a driver prefix.

> +		ret;							\
> +	 })
...
> +static int gicv5_its_device_get_itte_ref(struct gicv5_its_dev *its_dev,
> +					 __le64 **itte, u16 event_id)
> +{
> +	if (!its_dev->itt_cfg.l2itt) {
> +		__le64 *itt = its_dev->itt_cfg.linear.itt;
> +		*itte = &itt[event_id];

Can you return 0 here and dedent the whole } else { block?

> +	} else {
> +		__le64 *l2_itt, *l1_itt = its_dev->itt_cfg.l2.l1itt;
> +		unsigned int l1_idx, l2_idx, l2_size, l2_bits;
> +		int ret;
> +
> +		ret = gicv5_its_l2sz_to_l2_bits(its_dev->itt_cfg.l2.l2sz);
> +		if (ret < 0)
> +			return ret;
> +		l2_bits = ret;
> +
> +		l1_idx = event_id >> l2_bits;
> +
> +		if (!FIELD_GET(GICV5_ITTL1E_VALID,
> +			       le64_to_cpu(l1_itt[l1_idx]))) {
> +			pr_debug("L1 ITT entry is not valid.\n");
> +			return -EINVAL;
> +		}
> +
> +		l2_idx = event_id & GENMASK(l2_bits - 1, 0);
> +
> +		l2_size = BIT(FIELD_GET(GICV5_ITTL1E_SPAN,
> +					le64_to_cpu(l1_itt[l1_idx])));
> +
> +		// Sanity check our indexing
> +		if (l2_idx >= l2_size) {
> +			pr_debug("L2 ITT index (%u) exceeds L2 table size (%u)!\n",
> +			       l2_idx, l2_size);
> +			return -EINVAL;
> +		}
> +		l2_itt = its_dev->itt_cfg.l2.l2ptrs[l1_idx];
> +		*itte = &l2_itt[l2_idx];
> +	}
> +
> +	return 0;
> +}

...
> +static struct gicv5_its_dev *gicv5_its_alloc_device(
> +				struct gicv5_its_chip_data *its, int nvec,
> +				u32 dev_id)
> +{
> +	struct gicv5_its_dev *its_dev;
> +	int ret;
> +
> +	its_dev = gicv5_its_find_device(its, dev_id);
> +	if (!IS_ERR(its_dev)) {
> +		pr_debug("A device with this DeviceID (0x%x) has already been registered.\n",
> +			 dev_id);
> +
> +		if (nvec > its_dev->num_events) {
> +			pr_debug("Requesting more ITT entries than allocated\n");

Why only _debug()?

> +			return ERR_PTR(-ENXIO);
> +		}
> +
> +		its_dev->shared = true;
> +
> +		return its_dev;
> +	}
> +
> +	its_dev = kzalloc(sizeof(*its_dev), GFP_KERNEL);
> +	if (!its_dev)
> +		return ERR_PTR(-ENOMEM);
> +
> +	its_dev->device_id = dev_id;
> +	its_dev->num_events = nvec;
> +	its_dev->num_mapped_events = 0;
> +
> +	ret = gicv5_its_device_register(its, its_dev);
> +	if (ret) {
> +		pr_debug("Failed to register the device\n");

And here.

> +		kfree(its_dev);

Can you use __free() and return_ptr() instead?

> +		return ERR_PTR(ret);
> +	}
> +
> +	gicv5_its_device_cache_inv(its, its_dev);
> +
> +	/*
> +	 * This is the first time we have seen this device. Hence, it is not
> +	 * shared.
> +	 */
> +	its_dev->shared = false;
> +
> +	its_dev->its_node = its;
> +
> +	its_dev->event_map =
> +		(unsigned long *)bitmap_zalloc(its_dev->num_events, GFP_KERNEL);
> +	if (!its_dev->event_map) {
> +		gicv5_its_device_unregister(its, its_dev);
> +		kfree(its_dev);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	xa_store(&its->its_devices, dev_id, its_dev, GFP_KERNEL);

This can fail.

> +
> +	return its_dev;
> +}

...
> +static int gicv5_its_alloc_event(struct gicv5_its_dev *its_dev, u16 event_id,
> +				 u32 lpi)
> +{
> +	struct gicv5_its_chip_data *its = its_dev->its_node;
> +	u64 itt_entry;
> +	__le64 *itte;
> +	int ret;
> +
> +	if (event_id >= its_dev->num_events) {
> +		pr_debug("EventID 0x%x outside of ITT range (0x%x)\n", event_id,
> +		       its_dev->num_events);

Again, is this so often to be _debug()?

> +		return -EINVAL;
> +	}
> +
> +	if (WARN(its_dev->num_mapped_events == its_dev->num_events,
> +		"Reached maximum number of events\n"))

Weird indent level.

> +		return -EINVAL;
> +
> +	ret = gicv5_its_device_get_itte_ref(its_dev, &itte, event_id);
> +	if (ret)
> +		return ret;
> +
> +	if (FIELD_GET(GICV5_ITTL2E_VALID, *itte))
> +		return -EEXIST;
> +
> +	itt_entry = FIELD_PREP(GICV5_ITTL2E_LPI_ID, lpi) |
> +		    FIELD_PREP(GICV5_ITTL2E_VALID, 0x1);
> +
> +	its_write_table_entry(its, itte, cpu_to_le64(itt_entry));
> +
> +	gicv5_its_itt_cache_inv(its, its_dev->device_id, event_id);
> +
> +	its_dev->num_mapped_events += 1;

This is not python, we have ++ :).

> +
> +	return 0;
> +}
> +
> +static void gicv5_its_free_event(struct gicv5_its_dev *its_dev, u16 event_id)
> +{
> +	struct gicv5_its_chip_data *its = its_dev->its_node;
> +	u64 itte_val;
> +	__le64 *itte;
> +	int ret;
> +
> +	if (WARN(!its_dev->num_mapped_events, "No mapped events\n"))
> +		return;
> +
> +	ret = gicv5_its_device_get_itte_ref(its_dev, &itte, event_id);
> +	if (ret) {
> +		pr_debug("Failed to get the ITTE!\n");
> +		return;
> +	}
> +
> +	itte_val = le64_to_cpu(*itte);
> +	itte_val &= ~GICV5_ITTL2E_VALID;
> +
> +	its_write_table_entry(its, itte, cpu_to_le64(itte_val));
> +
> +	gicv5_its_itt_cache_inv(its, its_dev->device_id, event_id);
> +
> +	its_dev->num_mapped_events -= 1;

And --.

> +}
> +
> +static int gicv5_its_alloc_eventid(struct gicv5_its_dev *its_dev,
> +				   unsigned int nr_irqs, u32 *eventid)
> +{
> +	int ret;
> +
> +	ret = bitmap_find_free_region(its_dev->event_map,
> +				      its_dev->num_events,
> +				      get_count_order(nr_irqs));
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	*eventid = ret;
> +
> +	return 0;
> +}
...
> +static int gicv5_its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
> +				      unsigned int nr_irqs, void *arg)
> +{
> +	u32 device_id, event_id_base, lpi;
> +	struct msi_domain_info *msi_info;
> +	struct gicv5_its_chip_data *its;
> +	struct gicv5_its_dev *its_dev;
> +	msi_alloc_info_t *info = arg;
> +	irq_hw_number_t hwirq;
> +	struct irq_data *irqd;
> +	int ret, i;

Why is i not unsigned too?

> +
> +	device_id = info->scratchpad[0].ul;
> +
> +	msi_info = msi_get_domain_info(domain);
> +	its = msi_info->data;
> +
> +	mutex_lock(&its->dev_alloc_lock);
> +
> +	its_dev = gicv5_its_find_device(its, device_id);
> +	if (IS_ERR(its_dev)) {
> +		mutex_unlock(&its->dev_alloc_lock);

scope_guard() would make much sense here.

> +		return PTR_ERR(its_dev);
> +	}
> +
> +	ret = gicv5_its_alloc_eventid(its_dev, nr_irqs, &event_id_base);
> +	if (ret) {
> +		mutex_unlock(&its->dev_alloc_lock);
> +		return ret;
> +	}
> +
> +	mutex_unlock(&its->dev_alloc_lock);
> +
> +	ret = iommu_dma_prepare_msi(info->desc, its->its_trans_phys_base);
> +	if (ret)
> +		goto out_eventid;
> +
> +	for (i = 0; i < nr_irqs; i++) {
> +		lpi = gicv5_alloc_lpi();
> +		if (ret < 0) {
> +			pr_debug("Failed to find free LPI!\n");
> +			goto out_eventid;
> +		}
> +
> +		ret = irq_domain_alloc_irqs_parent(domain, virq + i, 1, &lpi);
> +		if (ret)
> +			goto out_free_lpi;
> +
> +		/*
> +		 * Store eventid and deviceid into the hwirq for later use.
> +		 *
> +		 *	hwirq  = event_id << 32 | device_id
> +		 */
> +		hwirq = FIELD_PREP(GICV5_ITS_HWIRQ_DEVICE_ID, device_id) |
> +			FIELD_PREP(GICV5_ITS_HWIRQ_EVENT_ID, (u64)event_id_base + i);
> +		irq_domain_set_info(domain, virq + i, hwirq,
> +				    &gicv5_its_irq_chip, its_dev,
> +				    handle_fasteoi_irq, NULL, NULL);
> +
> +		irqd = irq_get_irq_data(virq + i);
> +		irqd_set_single_target(irqd);
> +		irqd_set_affinity_on_activate(irqd);
> +		irqd_set_resend_when_in_progress(irqd);
> +	}
> +
> +	return 0;
> +out_free_lpi:
> +	gicv5_free_lpi(lpi);
> +out_eventid:
> +	gicv5_its_free_eventid(its_dev, event_id_base, nr_irqs);
> +
> +	return ret;
> +}
> +
> +static void gicv5_its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
> +				      unsigned int nr_irqs)
> +{
> +	struct msi_domain_info *msi_info;
> +	struct gicv5_its_chip_data *its;
> +	struct gicv5_its_dev *its_dev;
> +	struct irq_data *d;
> +	u16 event_id_base;
> +	bool free_device;
> +	u32 device_id;
> +	int i;
> +
> +	msi_info = msi_get_domain_info(domain);
> +	its = msi_info->data;
> +
> +	d = irq_domain_get_irq_data(domain, virq);
> +	device_id = FIELD_GET(GICV5_ITS_HWIRQ_DEVICE_ID, d->hwirq);
> +	event_id_base = FIELD_GET(GICV5_ITS_HWIRQ_EVENT_ID, d->hwirq);
> +
> +	guard(mutex)(&its->dev_alloc_lock);
> +
> +	its_dev = gicv5_its_find_device(its, device_id);
> +	if (IS_ERR(its_dev)) {
> +		pr_debug("Couldn't find the ITS device!\n");

This is serious, not debug, IMO. Either we leak memory or even allow out 
of bounds accesses somewhere.

> +		return;
> +	}
> +
> +	bitmap_release_region(its_dev->event_map, event_id_base,
> +			      get_count_order(nr_irqs));
> +
> +	free_device = !its_dev->shared && bitmap_empty(its_dev->event_map,
> +						       its_dev->num_events);
> +
> +	/*  Hierarchically free irq data */
> +	for (i = 0; i < nr_irqs; i++) {
> +		d = irq_domain_get_irq_data(domain, virq + i);
> +
> +		gicv5_free_lpi(d->parent_data->hwirq);
> +		irq_domain_reset_irq_data(d);
> +	}
> +	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
> +
> +	gicv5_its_syncr(its, its_dev);
> +	gicv5_irs_syncr();
> +
> +	if (free_device) {
> +		gicv5_its_device_unregister(its, its_dev);
> +		bitmap_free(its_dev->event_map);
> +		xa_erase(&its->its_devices, device_id);
> +		kfree(its_dev);
> +	}
> +}

...
> +static int __init gicv5_its_init_bases(phys_addr_t its_trans_base,
> +				       void __iomem *its_base,
> +				       struct fwnode_handle *handle,
> +				       struct irq_domain *parent_domain)
> +{
> +	struct device_node *np = to_of_node(handle);
> +	struct gicv5_its_chip_data *its_node;
> +	struct msi_domain_info *info;
> +	struct irq_domain *d;
> +	u32 cr0, cr1;
> +	bool enabled;
> +	int ret;
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info)
> +		return -ENOMEM;
> +
> +	its_node = kzalloc(sizeof(*its_node), GFP_KERNEL);
> +	if (!its_node) {
> +		kfree(info);
> +		return -ENOMEM;
> +	}
> +
> +	info->ops = &its_msi_domain_ops;
> +	info->data = its_node;
> +
> +	mutex_init(&its_node->dev_alloc_lock);
> +	xa_init(&its_node->its_devices);
> +	its_node->fwnode = handle;
> +	its_node->its_base = its_base;
> +	its_node->its_trans_phys_base = its_trans_base;
> +
> +	d = irq_domain_create_hierarchy(parent_domain, IRQ_DOMAIN_FLAG_ISOLATED_MSI,
> +					0, handle, &gicv5_its_irq_domain_ops, info);
> +	its_node->domain = d;
> +	irq_domain_update_bus_token(its_node->domain, DOMAIN_BUS_NEXUS);
> +
> +	its_node->domain->msi_parent_ops = &gic_its_msi_parent_ops;
> +	its_node->domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
> +
> +	cr0 = its_readl_relaxed(its_node, GICV5_ITS_CR0);
> +	enabled = FIELD_GET(GICV5_ITS_CR0_ITSEN, cr0);
> +	if (WARN(enabled, "ITS %s enabled, disabling it before proceeding\n",
> +		 np->full_name)) {
> +		cr0 = FIELD_PREP(GICV5_ITS_CR0_ITSEN, 0x0);
> +		its_writel_relaxed(its_node, cr0, GICV5_ITS_CR0);
> +		ret = gicv5_its_wait_for_cr0(its_node);
> +		if (ret) {
> +			irq_domain_remove(its_node->domain);
> +			kfree(info);
> +			kfree(its_node);
> +			return ret;
> +		}
> +	}
> +
> +	if (of_property_read_bool(np, "dma-noncoherent")) {
> +		/*
> +		 * A non-coherent ITS implies that some cache levels cannot be
> +		 * used coherently by the cores and GIC. Our only option is to mark
> +		 * memory attributes for the GIC as non-cacheable; by default,
> +		 * non-cacheable memory attributes imply outer-shareable
> +		 * shareability, the value written into ITS_CR1_SH is ignored.
> +		 */
> +		cr1 = FIELD_PREP(GICV5_ITS_CR1_ITT_RA, GICV5_NO_READ_ALLOC)	|
> +		      FIELD_PREP(GICV5_ITS_CR1_DT_RA, GICV5_NO_READ_ALLOC)	|
> +		      FIELD_PREP(GICV5_ITS_CR1_IC, GICV5_NON_CACHE)		|
> +		      FIELD_PREP(GICV5_ITS_CR1_OC, GICV5_NON_CACHE);
> +		its_node->flags |= ITS_FLAGS_NON_COHERENT;
> +	} else {
> +		cr1 = FIELD_PREP(GICV5_ITS_CR1_ITT_RA, GICV5_READ_ALLOC)	|
> +		      FIELD_PREP(GICV5_ITS_CR1_DT_RA, GICV5_READ_ALLOC)		|
> +		      FIELD_PREP(GICV5_ITS_CR1_IC, GICV5_WB_CACHE)		|
> +		      FIELD_PREP(GICV5_ITS_CR1_OC, GICV5_WB_CACHE)		|
> +		      FIELD_PREP(GICV5_ITS_CR1_SH, GICV5_INNER_SHARE);
> +	}
> +
> +	its_writel_relaxed(its_node, cr1, GICV5_ITS_CR1);
> +
> +	ret = gicv5_its_init_devtab(its_node);
> +	if (ret) {
> +		irq_domain_remove(its_node->domain);
> +		kfree(info);
> +		kfree(its_node);
> +		return ret;
> +	}
> +
> +	cr0 = FIELD_PREP(GICV5_ITS_CR0_ITSEN, 0x1);
> +	its_writel_relaxed(its_node, cr0, GICV5_ITS_CR0);
> +
> +	ret = gicv5_its_wait_for_cr0(its_node);
> +	if (ret) {
> +		irq_domain_remove(its_node->domain);
> +		kfree(info);
> +		kfree(its_node);

Either convert to cleanup.h or do this in a common error label(s).

> +		return ret;
> +	}
> +
> +	list_add(&its_node->entry, &its_nodes);
> +
> +	gicv5_its_print_info(its_node);
> +
> +	return 0;
> +}
...

> diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c
> index c4d4e85382f672fa4ae334db1a4e4c7c4f46b9fe..e483d0774936035b5cf2407da9a65d776bad3138 100644
> --- a/drivers/irqchip/irq-gic-v5.c
> +++ b/drivers/irqchip/irq-gic-v5.c
...
> @@ -168,17 +271,90 @@ struct gicv5_irs_chip_data {
>   
>   void __init gicv5_init_lpi_domain(void);
>   void __init gicv5_free_lpi_domain(void);
> +static inline int gicv5_wait_for_op(void __iomem *addr, u32 offset, u32 mask,
> +				    u32 *val)
> +{
> +	void __iomem *reg = addr + offset;
> +	u32 tmp;
> +	int ret;
> +
> +	ret = readl_poll_timeout_atomic(reg, tmp, tmp & mask, 1, 10 * USEC_PER_MSEC);

Does this have to be atomic? The call chain is complex, I haven't 
managed to check...

> +
> +	if (val)
> +		*val = tmp;

Do you really want to write val in case of timeout? Sounds unexpected.

> +	return ret;
> +}

thanks,
-- 
js
suse labs