[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6f50a325344218167a9cef12d82bccc7cded3b52.camel@infradead.org>
Date: Tue, 14 Dec 2021 21:27:44 +0000
From: David Woodhouse <dwmw2@...radead.org>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
"H . Peter Anvin" <hpa@...or.com>,
Paolo Bonzini <pbonzini@...hat.com>,
"Paul E . McKenney" <paulmck@...nel.org>,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
rcu@...r.kernel.org, mimoja@...oja.de, hewenliang4@...wei.com,
hushiyuan@...wei.com, luolongjun@...wei.com, hejingxian@...wei.com
Subject: Re: [PATCH v2 1/7] x86/apic/x2apic: Fix parallel handling of
cluster_mask
On Tue, 2021-12-14 at 17:10 +0000, Sean Christopherson wrote:
> On Tue, Dec 14, 2021, David Woodhouse wrote:
> > -static int alloc_clustermask(unsigned int cpu, int node)
> > +static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)
> > {
> > + struct cluster_mask *cmsk = NULL;
> > + unsigned int cpu_i;
> > + u32 apicid;
> > +
> > if (per_cpu(cluster_masks, cpu))
> > return 0;
> > - /*
> > - * If a hotplug spare mask exists, check whether it's on the right
> > - * node. If not, free it and allocate a new one.
> > +
> > + /* For the hotplug case, don't always allocate a new one */
> > + for_each_present_cpu(cpu_i) {
> > + apicid = apic->cpu_present_to_apicid(cpu_i);
> > + if (apicid != BAD_APICID && apicid >> 4 == cluster) {
> > + cmsk = per_cpu(cluster_masks, cpu_i);
> > + if (cmsk)
> > + break;
> > + }
> > + }
> > + if (!cmsk) {
> > + cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);
> > + }
> > + if (!cmsk)
> > + return -ENOMEM;
>
> This can be,
>
> if (!cmsk) {
> cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);
> if (!cmsk)
> return -ENOMEM;
> }
>
> which IMO is more intuitive, and it also "fixes" the unnecessary braces in thew
> initial check.
>
> > +
> > + cmsk->node = node;
> > + cmsk->clusterid = cluster;
> > +
> > + per_cpu(cluster_masks, cpu) = cmsk;
> > +
> > + /*
> > + * As an optimisation during boot, set the cluster_mask for *all*
> > + * present CPUs at once, to prevent *each* of them having to iterate
> > + * over the others to find the existing cluster_mask.
> > */
> > - if (cluster_hotplug_mask) {
> > - if (cluster_hotplug_mask->node == node)
> > - return 0;
> > - kfree(cluster_hotplug_mask);
> > + if (system_state < SYSTEM_RUNNING) {
>
> This can be
>
> if (system_state >= SYSTEM_RUNNING)
> return 0;
>
> to reduce indentation below.
>
> > + for_each_present_cpu(cpu) {
>
> Reusing @cpu here is all kinds of confusing.
>
> > + u32 apicid = apic->cpu_present_to_apicid(cpu);
>
> This shadows apicid. That's completely unnecessary.
>
> > + if (apicid != BAD_APICID && apicid >> 4 == cluster) {
>
> A helper for retrieving the cluster from a cpu would dedup at least three instances
> of this pattern.
Thanks. Let's just lift the whole thing out into a separate function.
And actually now I come to stare harder at it, the whole of the struct
cluster_mask can go away too. Looks a bit more like this now:
---
arch/x86/kernel/apic/x2apic_cluster.c | 108 +++++++++++++++-----------
1 file changed, 62 insertions(+), 46 deletions(-)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e696e22d0531..e116dfaf5922 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -9,11 +9,7 @@
#include "local.h"
-struct cluster_mask {
- unsigned int clusterid;
- int node;
- struct cpumask mask;
-};
+#define apic_cluster(apicid) ((apicid) >> 4)
/*
* __x2apic_send_IPI_mask() possibly needs to read
@@ -23,8 +19,7 @@ struct cluster_mask {
static u32 *x86_cpu_to_logical_apicid __read_mostly;
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
-static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
-static struct cluster_mask *cluster_hotplug_mask;
+static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks);
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
@@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
for_each_cpu(cpu, tmpmsk) {
- struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
+ struct cpumask *cmsk = per_cpu(cluster_masks, cpu);
dest = 0;
- for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
+ for_each_cpu_and(clustercpu, tmpmsk, cmsk)
dest |= x86_cpu_to_logical_apicid[clustercpu];
if (!dest)
@@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
/* Remove cluster CPUs from tmpmask */
- cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
+ cpumask_andnot(tmpmsk, tmpmsk, cmsk);
}
local_irq_restore(flags);
@@ -105,55 +100,76 @@ static u32 x2apic_calc_apicid(unsigned int cpu)
static void init_x2apic_ldr(void)
{
- struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
- u32 cluster, apicid = apic_read(APIC_LDR);
- unsigned int cpu;
+ struct cpumask *cmsk = this_cpu_read(cluster_masks);
- x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
+ BUG_ON(!cmsk);
- if (cmsk)
- goto update;
-
- cluster = apicid >> 16;
- for_each_online_cpu(cpu) {
- cmsk = per_cpu(cluster_masks, cpu);
- /* Matching cluster found. Link and update it. */
- if (cmsk && cmsk->clusterid == cluster)
- goto update;
+ cpumask_set_cpu(smp_processor_id(), cmsk);
+}
+
+/*
+ * As an optimisation during boot, set the cluster_mask for *all*
+ * present CPUs at once, to prevent *each* of them having to iterate
+ * over the others to find the existing cluster_mask.
+ */
+static void prefill_clustermask(struct cpumask *cmsk, u32 cluster)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu) {
+ u32 apicid = apic->cpu_present_to_apicid(cpu);
+ if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) {
+ struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu);
+
+ BUG_ON(*cpu_cmsk && *cpu_cmsk != cmsk);
+ *cpu_cmsk = cmsk;
+ }
}
- cmsk = cluster_hotplug_mask;
- cmsk->clusterid = cluster;
- cluster_hotplug_mask = NULL;
-update:
- this_cpu_write(cluster_masks, cmsk);
- cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
}
-static int alloc_clustermask(unsigned int cpu, int node)
+static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)
{
+ struct cpumask *cmsk = NULL;
+ unsigned int cpu_i;
+ u32 apicid;
+
if (per_cpu(cluster_masks, cpu))
return 0;
- /*
- * If a hotplug spare mask exists, check whether it's on the right
- * node. If not, free it and allocate a new one.
- */
- if (cluster_hotplug_mask) {
- if (cluster_hotplug_mask->node == node)
- return 0;
- kfree(cluster_hotplug_mask);
+
+ /* For the hotplug case, don't always allocate a new one */
+ if (system_state >= SYSTEM_RUNNING) {
+ for_each_present_cpu(cpu_i) {
+ apicid = apic->cpu_present_to_apicid(cpu_i);
+ if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) {
+ cmsk = per_cpu(cluster_masks, cpu_i);
+ if (cmsk)
+ break;
+ }
+ }
+ }
+ if (!cmsk) {
+ cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);
+ if (!cmsk)
+ return -ENOMEM;
}
- cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
- GFP_KERNEL, node);
- if (!cluster_hotplug_mask)
- return -ENOMEM;
- cluster_hotplug_mask->node = node;
+ per_cpu(cluster_masks, cpu) = cmsk;
+
+ if (system_state < SYSTEM_RUNNING)
+ prefill_clustermask(cmsk, cluster);
+
return 0;
}
static int x2apic_prepare_cpu(unsigned int cpu)
{
- if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
+ u32 phys_apicid = apic->cpu_present_to_apicid(cpu);
+ u32 cluster = apic_cluster(phys_apicid);
+ u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf));
+
+ x86_cpu_to_logical_apicid[cpu] = logical_apicid;
+
+ if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0)
return -ENOMEM;
if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
return -ENOMEM;
@@ -162,10 +178,10 @@ static int x2apic_prepare_cpu(unsigned int cpu)
static int x2apic_dead_cpu(unsigned int dead_cpu)
{
- struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
+ struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu);
if (cmsk)
- cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+ cpumask_clear_cpu(dead_cpu, cmsk);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
}
--
2.31.1
Download attachment "smime.p7s" of type "application/pkcs7-signature" (5174 bytes)
Powered by blists - more mailing lists