linux-kernel - Re: [PATCH 1/2] irqchip: RISC-V Local Interrupt Controller Driver

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 28 Jun 2017 22:47:40 +0200 (CEST)
From:   Thomas Gleixner <tglx@...utronix.de>
To:     Palmer Dabbelt <palmer@...belt.com>
cc:     linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] irqchip: RISC-V Local Interrupt Controller Driver

On Mon, 26 Jun 2017, Palmer Dabbelt wrote:
> +DEFINE_PER_CPU(struct riscv_irq_data, riscv_irq_data);
> +DEFINE_PER_CPU(atomic_long_t, riscv_early_sie);
> +
> +static void riscv_software_interrupt(void)
> +{
> +#ifdef CONFIG_SMP
> +	irqreturn_t ret;
> +
> +	ret = handle_ipi();
> +	if (ret != IRQ_NONE)
> +		return;
> +#endif
> +
> +	BUG();

Are you sure you want to crash the system just because a spurious interrupt
happened?

> +}
> +
> +asmlinkage void __irq_entry do_IRQ(unsigned int cause, struct pt_regs *regs)
> +{
> +	struct pt_regs *old_regs = set_irq_regs(regs);
> +	struct irq_domain *domain;
> +
> +	irq_enter();
> +
> +	/* There are three classes of interrupt: timer, software, and

Please use proper multiline comment formatting:

        /*
	 * bla.....
	 * foo.....
	 */

> +	 * external devices.  We dispatch between them here.  External
> +	 * device interrupts use the generic IRQ mechanisms.
> +	 */
> +	switch (cause) {
> +	case INTERRUPT_CAUSE_TIMER:
> +		riscv_timer_interrupt();
> +		break;
> +	case INTERRUPT_CAUSE_SOFTWARE:
> +		riscv_software_interrupt();
> +		break;
> +	default:
> +		domain = per_cpu(riscv_irq_data, smp_processor_id()).domain;
> +		generic_handle_irq(irq_find_mapping(domain, cause));
> +		break;
> +	}
> +
> +	irq_exit();
> +	set_irq_regs(old_regs);
> +}
> +
> +static int riscv_irqdomain_map(struct irq_domain *d, unsigned int irq,
> +			       irq_hw_number_t hwirq)
> +{
> +	struct riscv_irq_data *data = d->host_data;
> +
> +	irq_set_chip_and_handler(irq, &data->chip, handle_simple_irq);
> +	irq_set_chip_data(irq, data);
> +	irq_set_noprobe(irq);
> +
> +	return 0;
> +}
> +
> +static const struct irq_domain_ops riscv_irqdomain_ops = {
> +	.map	= riscv_irqdomain_map,
> +	.xlate	= irq_domain_xlate_onecell,
> +};
> +
> +static void riscv_irq_mask(struct irq_data *d)
> +{
> +	struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> +	BUG_ON(smp_processor_id() != data->hart);

Crashing the machine is the last resort if there is no chance to handle a
situation gracefully. Something like

	if (WARN_ON_ONCE(smp_processor_id() != data->hart))
		do_something_sensible();
	else
		.....

might at least keep the machine halfways functional for debugging.

> +	csr_clear(sie, 1 << (long)d->hwirq);
> +}
> +
> +static void riscv_irq_unmask(struct irq_data *d)
> +{
> +	struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> +	BUG_ON(smp_processor_id() != data->hart);
> +	csr_set(sie, 1 << (long)d->hwirq);
> +}
> +
> +static void riscv_irq_enable_helper(void *d)
> +{
> +	riscv_irq_unmask(d);
> +}
> +
> +static void riscv_irq_enable(struct irq_data *d)
> +{
> +	struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> +	/* There are two phases to setting up an interrupt: first we set a bit
> +	 * in this bookkeeping structure, which is used by trap_init to
> +	 * initialize SIE for each hart as it comes up.

And what exactly has this to do with irq_enable()? Why would you call that
for an interrupt which solely goes to a offline cpu?

> +static void riscv_irq_disable(struct irq_data *d)
> +{
> +	struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> +	/* This is the mirror of riscv_irq_enable. */
> +	atomic_long_and(~(1 << (long)d->hwirq),
> +			&per_cpu(riscv_early_sie, data->hart));
> +	if (data->hart == smp_processor_id())
> +		riscv_irq_mask(d);
> +	else if (cpu_online(data->hart))
> +		smp_call_function_single(data->hart,
> +					 riscv_irq_disable_helper,
> +					 d,
> +					 true);

Same question as above.

> +}
> +
> +static void riscv_irq_mask_noop(struct irq_data *d) { }
> +
> +static void riscv_irq_unmask_noop(struct irq_data *d) { }
> 
> +static void riscv_irq_enable_noop(struct irq_data *d)
> +{
> +	struct device_node *data = irq_data_get_irq_chip_data(d);
> +	u32 hart;
> +
> +	if (!of_property_read_u32(data, "reg", &hart))
> +		printk(
> +		  KERN_WARNING "enabled interrupt %d for missing hart %d (this interrupt has no handler)\n",

Has no handler? I really have a hard time to understand the logic here.

> +		  (int)d->hwirq, hart);
> +}
> +
> +static struct irq_chip riscv_noop_chip = {
> +	.name = "riscv,cpu-intc,noop",
> +	.irq_mask = riscv_irq_mask_noop,
> +	.irq_unmask = riscv_irq_unmask_noop,
> +	.irq_enable = riscv_irq_enable_noop,

Please write that in tabular fashion:

	.name		= "riscv,cpu-intc,noop",
	.irq_mask	= riscv_irq_mask_noop,

> +};
> +
> +static int riscv_irqdomain_map_noop(struct irq_domain *d, unsigned int irq,
> +				    irq_hw_number_t hwirq)
> +{
> +	struct device_node *data = d->host_data;
> +
> +	irq_set_chip_and_handler(irq, &riscv_noop_chip, handle_simple_irq);
> +	irq_set_chip_data(irq, data);
> +	return 0;
> +}
> +
> +static const struct irq_domain_ops riscv_irqdomain_ops_noop = {
> +	.map    = riscv_irqdomain_map_noop,
> +	.xlate  = irq_domain_xlate_onecell,
> +};
> +
> +static int riscv_intc_init(struct device_node *node, struct device_node *parent)
> +{
> +	int hart;
> +	struct riscv_irq_data *data;
> +
> +	if (parent)
> +		return 0;
> +
> +	hart = riscv_of_processor_hart(node->parent);
> +	if (hart < 0) {
> +		/* If a hart is disabled, create a no-op irq domain.  Devices
> +		 * may still have interrupts connected to those harts.  This is
> +		 * not wrong... unless they actually load a driver that needs
> +		 * it!
> +		 */
> +		irq_domain_add_linear(
> +			node,
> +			8*sizeof(uintptr_t),
> +			&riscv_irqdomain_ops_noop,
> +			node->parent);
> +		return 0;

I have a hard time to understand the logic here. Why do you need an
interrupt domain for something which does not exist? That does not make any
sense.

> +	}
> +
> +	data = &per_cpu(riscv_irq_data, hart);
> +	snprintf(data->name, sizeof(data->name), "riscv,cpu_intc,%d", hart);
> +	data->hart = hart;
> +	data->chip.name = data->name;
> +	data->chip.irq_mask = riscv_irq_mask;
> +	data->chip.irq_unmask = riscv_irq_unmask;
> +	data->chip.irq_enable = riscv_irq_enable;
> +	data->chip.irq_disable = riscv_irq_disable;
> +	data->domain = irq_domain_add_linear(
> +		node,
> +		8*sizeof(uintptr_t),
> +		&riscv_irqdomain_ops,
> +		data);
> +	WARN_ON(!data->domain);

That warn_on is great. You warn and then continue as if nothing
happened. Machine will crash later dereferencing a NULL pointer.

Some more epxlanations about the inner workings of all of this would be
appreciated.

Thanks,

	tglx