[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2c367508-f81b-342e-eb05-8bbd1b056279@huawei.com>
Date: Mon, 16 Mar 2020 13:02:48 +0000
From: John Garry <john.garry@...wei.com>
To: Marc Zyngier <maz@...nel.org>, <linux-kernel@...r.kernel.org>,
<linux-arm-kernel@...ts.infradead.org>
CC: chenxiang <chenxiang66@...ilicon.com>,
Zhou Wang <wangzhou1@...ilicon.com>,
Ming Lei <ming.lei@...hat.com>,
Jason Cooper <jason@...edaemon.net>,
Thomas Gleixner <tglx@...utronix.de>,
"luojiaxing@...wei.com" <luojiaxing@...wei.com>
Subject: Re: [PATCH v3 2/2] irqchip/gic-v3-its: Balance initial LPI affinity
across CPUs
On 16/03/2020 11:54, Marc Zyngier wrote:
> When mapping a LPI, the ITS driver picks the first possible
> affinity, which is in most cases CPU0, assuming that if
> that's not suitable, someone will come and set the affinity
> to something more interesting.
>
> It apparently isn't the case, and people complain of poor
> performance when many interrupts are glued to the same CPU.
> So let's place the interrupts by finding the "least loaded"
> CPU (that is, the one that has the fewer LPIs mapped to it).
> So called 'managed' interrupts are an interesting case where
> the affinity is actually dictated by the kernel itself, and
> we should honor this.
>
> Reported-by: John Garry <john.garry@...wei.com>
> Link: https://lore.kernel.org/r/1575642904-58295-1-git-send-email-john.garry@huawei.com
> Signed-off-by: Marc Zyngier <maz@...nel.org>
> ---
> drivers/irqchip/irq-gic-v3-its.c | 118 ++++++++++++++++++++++++-------
> 1 file changed, 92 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 941786e1e8f7..7f1b731c04bb 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -1531,31 +1531,107 @@ static void its_dec_lpi_count(struct irq_data *d, int cpu)
> atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->unmanaged);
> }
>
> +static unsigned int cpumask_pick_least_loaded(struct irq_data *d,
> + const struct cpumask *cpu_mask)
> +{
> + unsigned int cpu = nr_cpu_ids, tmp;
> + int count = S32_MAX;
> +
> + for_each_cpu(tmp, cpu_mask) {
Hi Marc,
> + int this_count = its_read_lpi_count(d, tmp);
Not sure if it's intentional, but now there seems to be a subtle
difference to what Thomas described for non-managed interrupts - for
non-managed interrupts, x86 selects the CPU based on the total interrupt
load per CPU (or, more specifically, lowest vector allocation count),
and not just the non-managed load. Or maybe I misread it.
Anyway, we can test this now for NVMe with its managed interrupts.
Cheers,
John
> + if (this_count < count) {
> + cpu = tmp;
> + count = this_count;
> + }
> + }
> +
> + return cpu;
> +}
> +
> +/*
> + * As suggested by Thomas Gleixner in:
> + * https://lore.kernel.org/r/87h80q2aoc.fsf@nanos.tec.linutronix.de
> + */
> +static int its_select_cpu(struct irq_data *d,
> + const struct cpumask *aff_mask)
> +{
> + struct its_device *its_dev = irq_data_get_irq_chip_data(d);
> + cpumask_var_t tmpmask;
> + int cpu, node;
> +
> + if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
> + return -ENOMEM;
> +
> + node = its_dev->its->numa_node;
> +
> + if (!irqd_affinity_is_managed(d)) {
> + /* First try the NUMA node */
> + if (node != NUMA_NO_NODE) {
> + /*
> + * Try the intersection of the affinity mask and the
> + * node mask (and the online mask, just to be safe).
> + */
> + cpumask_and(tmpmask, cpumask_of_node(node), aff_mask);
> + cpumask_and(tmpmask, tmpmask, cpu_online_mask);
> +
> + /* If that doesn't work, try the nodemask itself */
> + if (cpumask_empty(tmpmask))
> + cpumask_and(tmpmask, cpumask_of_node(node), cpu_online_mask);
> +
> + cpu = cpumask_pick_least_loaded(d, tmpmask);
> + if (cpu < nr_cpu_ids)
> + goto out;
> +
> + /* If we can't cross sockets, give up */
> + if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144))
> + goto out;
> +
> + /* If the above failed, expand the search */
> + }
> +
> + /* Try the intersection of the affinity and online masks */
> + cpumask_and(tmpmask, aff_mask, cpu_online_mask);
> +
> + /* If that doesn't fly, the online mask is the last resort */
> + if (cpumask_empty(tmpmask))
> + cpumask_copy(tmpmask, cpu_online_mask);
> +
> + cpu = cpumask_pick_least_loaded(d, tmpmask);
> + } else {
> + cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask);
> +
> + /* If we cannot cross sockets, limit the search to that node */
> + if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) &&
> + node != NUMA_NO_NODE)
> + cpumask_and(tmpmask, tmpmask, cpumask_of_node(node));
> +
> + cpu = cpumask_pick_least_loaded(d, tmpmask);
> + }
> +out:
> + free_cpumask_var(tmpmask);
> +
> + pr_debug("IRQ%d -> %*pbl CPU%d\n", d->irq, cpumask_pr_args(aff_mask), cpu);
> + return cpu;
> +}
> +
> static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
> bool force)
> {
> - unsigned int cpu;
> - const struct cpumask *cpu_mask = cpu_online_mask;
> struct its_device *its_dev = irq_data_get_irq_chip_data(d);
> struct its_collection *target_col;
> u32 id = its_get_event_id(d);
> + int cpu;
>
> /* A forwarded interrupt should use irq_set_vcpu_affinity */
> if (irqd_is_forwarded_to_vcpu(d))
> return -EINVAL;
>
> - /* lpi cannot be routed to a redistributor that is on a foreign node */
> - if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
> - if (its_dev->its->numa_node >= 0) {
> - cpu_mask = cpumask_of_node(its_dev->its->numa_node);
> - if (!cpumask_intersects(mask_val, cpu_mask))
> - return -EINVAL;
> - }
> - }
> -
> - cpu = cpumask_any_and(mask_val, cpu_mask);
> + if (!force)
> + cpu = its_select_cpu(d, mask_val);
> + else
> + cpu = cpumask_pick_least_loaded(d, mask_val);
>
> - if (cpu >= nr_cpu_ids)
> + if (cpu < 0 || cpu >= nr_cpu_ids)
> return -EINVAL;
>
> /* don't set the affinity when the target cpu is same as current one */
> @@ -3455,21 +3531,11 @@ static int its_irq_domain_activate(struct irq_domain *domain,
> {
> struct its_device *its_dev = irq_data_get_irq_chip_data(d);
> u32 event = its_get_event_id(d);
> - const struct cpumask *cpu_mask = cpu_online_mask;
> int cpu;
>
> - /* get the cpu_mask of local node */
> - if (its_dev->its->numa_node >= 0)
> - cpu_mask = cpumask_of_node(its_dev->its->numa_node);
> -
> - /* Bind the LPI to the first possible CPU */
> - cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
> - if (cpu >= nr_cpu_ids) {
> - if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
> - return -EINVAL;
> -
> - cpu = cpumask_first(cpu_online_mask);
> - }
> + cpu = its_select_cpu(d, cpu_online_mask);
> + if (cpu < 0 || cpu >= nr_cpu_ids)
> + return -EINVAL;
>
> its_inc_lpi_count(d, cpu);
> its_dev->event_map.col_map[event] = cpu;
>
Powered by blists - more mailing lists