[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241020121819.GAZxT1CyR_5vLLZ5e6@fat_crate.local>
Date: Sun, 20 Oct 2024 14:18:19 +0200
From: Borislav Petkov <bp@...en8.de>
To: Jens Axboe <axboe@...nel.dk>
Cc: Thomas Gleixner <tglx@...utronix.de>,
the arch/x86 maintainers <x86@...nel.org>,
LKML <linux-kernel@...r.kernel.org>,
"regressions@...ts.linux.dev" <regressions@...ts.linux.dev>
Subject: Re: AMD zen microcode updates breaks boot
On Sat, Oct 19, 2024 at 09:24:24PM -0600, Jens Axboe wrote:
> This was my initial thought when I saw where it crashed, is this being
> run before node masks are initialized?
Yap, looka here:
...
[ 14.195542] #253
[ 13.435208] microcode: cache_find_patch: equiv_cpu: 0x0, patch_id: 0xaa00009
[ 13.435208] microcode: ucode_rev_to_cpuid: val: 0xaa00009, p.stepping: 0x0, c.stepping: 0x0
[ 13.435208] microcode: ucode_rev_to_cpuid: val: 0xaa00009, p.stepping: 0x0, c.stepping: 0x0
[ 13.435208] microcode: patch_cpus_equivalent: p_cid.full: 0xaa0f00, n_cid.full: 0xaa0f00
[ 13.435208] microcode: cache_find_patch: using 0xaa00009
[ 14.203190] #254
[ 13.435208] microcode: ucode_rev_to_cpuid: val: 0xaa00009, p.stepping: 0x0, c.stepping: 0x0
[ 13.435208] microcode: patch_cpus_equivalent: p_cid.full: 0xaa0f00, n_cid.full: 0xaa0f00
[ 13.435208] microcode: cache_find_patch: using 0xaa00009
[ 13.435208] microcode: cache_find_patch: equiv_cpu: 0x0, patch_id: 0xaa00009
[ 14.223125] #255
[ 13.435208] microcode: ucode_rev_to_cpuid: val: 0xaa00009, p.stepping: 0x0, c.stepping: 0x0
[ 13.435208] microcode: cache_find_patch: equiv_cpu: 0x0, patch_id: 0xaa00009
<--- Patching on the first node ends here and now it starts fixing up the
NUMA masks:
[ 13.435153] numa_add_cpu cpu 1 node 0: mask now 0-1
[ 13.435153] numa_add_cpu cpu 2 node 0: mask now 0-2
[ 13.435153] numa_add_cpu cpu 3 node 0: mask now 0-3
[ 13.435208] microcode: ucode_rev_to_cpuid: val: 0xaa00009, p.stepping: 0x0, c.stepping: 0x0
[ 13.435153] numa_add_cpu cpu 4 node 0: mask now 0-4
[ 13.435154] numa_add_cpu cpu 5 node 0: mask now 0-5
[ 13.435208] microcode: ucode_rev_to_cpuid: val: 0xaa00009, p.stepping: 0x0, c.stepping: 0x0
[ 13.435154] numa_add_cpu cpu 6 node 0: mask now 0-6
[ 13.435208] microcode: patch_cpus_equivalent: p_cid.full: 0xaa0f00, n_cid.full: 0xaa0f00
[ 13.435154] numa_add_cpu cpu 7 node 0: mask now 0-7
[ 13.435154] numa_add_cpu cpu 8 node 1: mask now 8
[ 13.435154] numa_add_cpu cpu 9 node 1: mask now 8-9
...
so we're clearly too early.
Ok, this diff at the end should fix it completely. I think. The same dance,
replace, build and boot.
I've moved the iteration over the nodes in the late loading path as this is
where we need it only anyway. If you wanna test that too and you feel brave,
do this after the machine boots:
# echo 1 > /sys/devices/system/cpu/microcode/reload
It should say
bash: echo: write error: File descriptor in bad state
because you don't have newer microcode but there will be output in dmesg and
the nodes should look all correct. Like here:
dmesg | grep nid:
[ 156.770267] microcode: load_microcode_amd: nid: 0, cpu: 0, c ptr: 0xff11000c1f819020
[ 156.829217] microcode: load_microcode_amd: nid: 1, cpu: 8, c ptr: 0xff1100181f619020
[ 156.888810] microcode: load_microcode_amd: nid: 2, cpu: 16, c ptr: 0xff11006c1f619020
[ 156.950003] microcode: load_microcode_amd: nid: 3, cpu: 24, c ptr: 0xff1100781f619020
[ 157.011062] microcode: load_microcode_amd: nid: 4, cpu: 32, c ptr: 0xff11003c1f619020
[ 157.071976] microcode: load_microcode_amd: nid: 5, cpu: 40, c ptr: 0xff1100481f619020
[ 157.132870] microcode: load_microcode_amd: nid: 6, cpu: 48, c ptr: 0xff11009c1f619020
[ 157.193847] microcode: load_microcode_amd: nid: 7, cpu: 56, c ptr: 0xff1100a81f619020
[ 157.254828] microcode: load_microcode_amd: nid: 8, cpu: 64, c ptr: 0xff1100541f619020
[ 157.315782] microcode: load_microcode_amd: nid: 9, cpu: 72, c ptr: 0xff1100601f619020
[ 157.376709] microcode: load_microcode_amd: nid: 10, cpu: 80, c ptr: 0xff1100b41f619020
[ 157.437826] microcode: load_microcode_amd: nid: 11, cpu: 88, c ptr: 0xff1100c01f619020
[ 157.498932] microcode: load_microcode_amd: nid: 12, cpu: 96, c ptr: 0xff1100241f619020
[ 157.559974] microcode: load_microcode_amd: nid: 13, cpu: 104, c ptr: 0xff1100301f619020
[ 157.621153] microcode: load_microcode_amd: nid: 14, cpu: 112, c ptr: 0xff1100841f619020
[ 157.682331] microcode: load_microcode_amd: nid: 15, cpu: 120, c ptr: 0xff1100901f619020
[ 157.743529] microcode: load_microcode_amd: nid: 16, cpu: 128, c ptr: 0xff1101080f619020
[ 157.804669] microcode: load_microcode_amd: nid: 17, cpu: 136, c ptr: 0xff1101104f619020
[ 157.865863] microcode: load_microcode_amd: nid: 18, cpu: 144, c ptr: 0xff11014a0f619020
[ 157.927048] microcode: load_microcode_amd: nid: 19, cpu: 152, c ptr: 0xff1101524f619020
[ 157.988263] microcode: load_microcode_amd: nid: 20, cpu: 160, c ptr: 0xff1101290f619020
[ 158.049464] microcode: load_microcode_amd: nid: 21, cpu: 168, c ptr: 0xff1101314f619020
[ 158.110717] microcode: load_microcode_amd: nid: 22, cpu: 176, c ptr: 0xff11016b0f619020
[ 158.171976] microcode: load_microcode_amd: nid: 23, cpu: 184, c ptr: 0xff1101734f619020
[ 158.233168] microcode: load_microcode_amd: nid: 24, cpu: 192, c ptr: 0xff1101398f619020
[ 158.294369] microcode: load_microcode_amd: nid: 25, cpu: 200, c ptr: 0xff110141cf619020
[ 158.355615] microcode: load_microcode_amd: nid: 26, cpu: 208, c ptr: 0xff11017b8f619020
[ 158.416845] microcode: load_microcode_amd: nid: 27, cpu: 216, c ptr: 0xff110183c3019020
[ 158.478043] microcode: load_microcode_amd: nid: 28, cpu: 224, c ptr: 0xff1101188f619020
[ 158.539273] microcode: load_microcode_amd: nid: 29, cpu: 232, c ptr: 0xff110120cf619020
[ 158.600503] microcode: load_microcode_amd: nid: 30, cpu: 240, c ptr: 0xff11015a8f619020
[ 158.661735] microcode: load_microcode_amd: nid: 31, cpu: 248, c ptr: 0xff110162cf619020
Every node has 8 CPUs in it, as it should.
Anyway, full conglomerate diff below.
Thx.
---
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index f63b051f25a0..602fd382e0b5 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -158,6 +158,9 @@ static union cpuid_1_eax ucode_rev_to_cpuid(unsigned int val)
c.family = 0xf;
c.ext_fam = p.ext_fam;
+ pr_info("%s: val: 0x%x, p.stepping: 0x%x, c.stepping: 0x%x\n",
+ __func__, val, p.stepping, c.stepping);
+
return c;
}
@@ -584,7 +587,7 @@ void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_
native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
}
-static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
+static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size);
static int __init save_microcode_in_initrd(void)
{
@@ -605,7 +608,7 @@ static int __init save_microcode_in_initrd(void)
if (!desc.mc)
return -EINVAL;
- ret = load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
+ ret = _load_microcode_amd(x86_family(cpuid_1_eax), desc.data, desc.size);
if (ret > UCODE_UPDATED)
return -EINVAL;
@@ -613,16 +616,22 @@ static int __init save_microcode_in_initrd(void)
}
early_initcall(save_microcode_in_initrd);
-static inline bool patch_cpus_equivalent(struct ucode_patch *p, struct ucode_patch *n)
+static inline bool patch_cpus_equivalent(struct ucode_patch *p,
+ struct ucode_patch *n,
+ bool ignore_stepping)
{
/* Zen and newer hardcode the f/m/s in the patch ID */
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
union cpuid_1_eax p_cid = ucode_rev_to_cpuid(p->patch_id);
union cpuid_1_eax n_cid = ucode_rev_to_cpuid(n->patch_id);
- /* Zap stepping */
- p_cid.stepping = 0;
- n_cid.stepping = 0;
+ if (ignore_stepping) {
+ p_cid.stepping = 0;
+ n_cid.stepping = 0;
+ }
+
+ pr_info("%s: p_cid.full: 0x%x, n_cid.full: 0x%x\n",
+ __func__, p_cid.full, n_cid.full);
return p_cid.full == n_cid.full;
} else {
@@ -641,16 +650,22 @@ static struct ucode_patch *cache_find_patch(struct ucode_cpu_info *uci, u16 equi
n.equiv_cpu = equiv_cpu;
n.patch_id = uci->cpu_sig.rev;
+ pr_info("%s: equiv_cpu: 0x%x, patch_id: 0x%x\n",
+ __func__, equiv_cpu, uci->cpu_sig.rev);
+
WARN_ON_ONCE(!n.patch_id);
- list_for_each_entry(p, µcode_cache, plist)
- if (patch_cpus_equivalent(p, &n))
+ list_for_each_entry(p, µcode_cache, plist) {
+ if (patch_cpus_equivalent(p, &n, false)) {
+ pr_info("%s: using 0x%x\n", __func__, p->patch_id);
return p;
+ }
+ }
return NULL;
}
-static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
+static inline int patch_newer(struct ucode_patch *p, struct ucode_patch *n)
{
/* Zen and newer hardcode the f/m/s in the patch ID */
if (x86_family(bsp_cpuid_1_eax) >= 0x17) {
@@ -659,6 +674,9 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
zp.ucode_rev = p->patch_id;
zn.ucode_rev = n->patch_id;
+ if (zn.stepping != zp.stepping)
+ return -1;
+
return zn.rev > zp.rev;
} else {
return n->patch_id > p->patch_id;
@@ -668,22 +686,32 @@ static inline bool patch_newer(struct ucode_patch *p, struct ucode_patch *n)
static void update_cache(struct ucode_patch *new_patch)
{
struct ucode_patch *p;
+ int ret;
list_for_each_entry(p, µcode_cache, plist) {
- if (patch_cpus_equivalent(p, new_patch)) {
- if (!patch_newer(p, new_patch)) {
+ if (patch_cpus_equivalent(p, new_patch, true)) {
+ ret = patch_newer(p, new_patch);
+ if (ret < 0)
+ continue;
+ else if (!ret) {
/* we already have the latest patch */
kfree(new_patch->data);
kfree(new_patch);
return;
}
+ pr_info("%s: replace 0x%x with 0x%x\n",
+ __func__, p->patch_id, new_patch->patch_id);
+
list_replace(&p->plist, &new_patch->plist);
kfree(p->data);
kfree(p);
return;
}
}
+
+ pr_info("%s: add patch: 0x%x\n", __func__, new_patch->patch_id);
+
/* no patch found, add it */
list_add_tail(&new_patch->plist, µcode_cache);
}
@@ -944,30 +972,45 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
return UCODE_OK;
}
-static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+static enum ucode_state _load_microcode_amd(u8 family, const u8 *data, size_t size)
{
- struct cpuinfo_x86 *c;
- unsigned int nid, cpu;
- struct ucode_patch *p;
enum ucode_state ret;
/* free old equiv table */
free_equiv_cpu_table();
ret = __load_microcode_amd(family, data, size);
- if (ret != UCODE_OK) {
+ if (ret != UCODE_OK)
cleanup();
+
+ return ret;
+}
+
+static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+{
+ struct cpuinfo_x86 *c;
+ unsigned int nid, cpu;
+ struct ucode_patch *p;
+ enum ucode_state ret;
+
+ ret = _load_microcode_amd(family, data, size);
+ if (ret != UCODE_OK)
return ret;
- }
for_each_node(nid) {
cpu = cpumask_first(cpumask_of_node(nid));
c = &cpu_data(cpu);
+ pr_info("%s: nid: %d, cpu: %d, c ptr: 0x%px\n",
+ __func__, nid, cpu, c);
+
p = find_patch(cpu);
if (!p)
continue;
+ pr_info("%s: microcode: 0x%x, patch_id: 0x%x\n",
+ __func__, c->microcode, p->patch_id);
+
if (c->microcode >= p->patch_id)
continue;
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Powered by blists - more mailing lists