linux-kernel - [GIT PULL] x86/microcode changes for v4.11

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20170220125806.GA28416@gmail.com>
Date:   Mon, 20 Feb 2017 13:58:06 +0100
From:   Ingo Molnar <mingo@...nel.org>
To:     Linus Torvalds <torvalds@...ux-foundation.org>
Cc:     linux-kernel@...r.kernel.org, Borislav Petkov <bp@...en8.de>,
        Thomas Gleixner <tglx@...utronix.de>,
        "H. Peter Anvin" <hpa@...or.com>,
        Peter Zijlstra <a.p.zijlstra@...llo.nl>,
        Andrew Morton <akpm@...ux-foundation.org>
Subject: [GIT PULL] x86/microcode changes for v4.11

Linus,

Please pull the latest x86-microcode-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-microcode-for-linus

   # HEAD: f26483eaedec39b09b1f2bdfc3f0d18f86764327 Merge branch 'x86/urgent' into x86/microcode, to resolve conflicts

The main changes are further simplification and unification of the code between 
the AMD and Intel microcode loaders, plus other simplifications - by Borislav 
Petkov.

 Thanks,

	Ingo

------------------>
Borislav Petkov (15):
      x86/MSR: Carve out bare minimum accessors
      x86/microcode: Convert to bare minimum MSR accessors
      x86/microcode/AMD: Clean up find_equiv_id()
      x86/microcode/AMD: Shorten function parameter's name
      x86/microcode/AMD: Extend the container struct
      x86/microcode/AMD: Rework container parsing
      x86/microcode: Decrease CPUID use
      x86/microcode/AMD: Get rid of global this_equiv_id
      x86/microcode/AMD: Use find_microcode_in_initrd()
      x86/microcode: Remove local vendor variable
      x86/microcode/AMD: Check patch level only on the BSP
      x86/microcode/AMD: Unify load_ucode_amd_ap()
      x86/microcode/AMD: Simplify saving from initrd
      x86/microcode/AMD: Remove AP scanning optimization
      x86/microcode/AMD: Remove struct cont_desc.eq_id


 arch/x86/include/asm/apic.h          |   2 +-
 arch/x86/include/asm/microcode.h     |   9 +-
 arch/x86/include/asm/microcode_amd.h |   2 -
 arch/x86/include/asm/msr.h           |  51 ++--
 arch/x86/kernel/cpu/microcode/amd.c  | 501 +++++++++++------------------------
 arch/x86/kernel/cpu/microcode/core.c |  81 ++++--
 6 files changed, 249 insertions(+), 397 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 0c5fbc68e82d..eff8e36aaf72 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -195,7 +195,7 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
 
 static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
 {
-	wrmsr_notrace(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+	__wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
 }
 
 static inline u32 native_apic_msr_read(u32 reg)
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 2266f864b747..daadeeea00b1 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -7,18 +7,17 @@
 
 #define native_rdmsr(msr, val1, val2)			\
 do {							\
-	u64 __val = native_read_msr((msr));		\
+	u64 __val = __rdmsr((msr));			\
 	(void)((val1) = (u32)__val);			\
 	(void)((val2) = (u32)(__val >> 32));		\
 } while (0)
 
 #define native_wrmsr(msr, low, high)			\
-	native_write_msr(msr, low, high)
+	__wrmsr(msr, low, high)
 
 #define native_wrmsrl(msr, val)				\
-	native_write_msr((msr),				\
-			 (u32)((u64)(val)),		\
-			 (u32)((u64)(val) >> 32))
+	__wrmsr((msr), (u32)((u64)(val)),		\
+		       (u32)((u64)(val) >> 32))
 
 struct ucode_patch {
 	struct list_head plist;
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 3e3e20be829a..3d57009e168b 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -54,6 +54,4 @@ static inline int __init
 save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
 void reload_ucode_amd(void) {}
 #endif
-
-extern bool check_current_patch_level(u32 *rev, bool early);
 #endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index db0b90c3b03e..898dba2e2e2c 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -80,7 +80,14 @@ static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
 static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
 #endif
 
-static inline unsigned long long native_read_msr(unsigned int msr)
+/*
+ * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR
+ * accessors and should not have any tracing or other functionality piggybacking
+ * on them - those are *purely* for accessing MSRs and nothing more. So don't even
+ * think of extending them - you will be slapped with a stinking trout or a frozen
+ * shark will reach you, wherever you are! You've been warned.
+ */
+static inline unsigned long long notrace __rdmsr(unsigned int msr)
 {
 	DECLARE_ARGS(val, low, high);
 
@@ -88,11 +95,30 @@ static inline unsigned long long native_read_msr(unsigned int msr)
 		     "2:\n"
 		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
 		     : EAX_EDX_RET(val, low, high) : "c" (msr));
-	if (msr_tracepoint_active(__tracepoint_read_msr))
-		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
+
 	return EAX_EDX_VAL(val, low, high);
 }
 
+static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high)
+{
+	asm volatile("1: wrmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+		     : : "c" (msr), "a"(low), "d" (high) : "memory");
+}
+
+static inline unsigned long long native_read_msr(unsigned int msr)
+{
+	unsigned long long val;
+
+	val = __rdmsr(msr);
+
+	if (msr_tracepoint_active(__tracepoint_read_msr))
+		do_trace_read_msr(msr, val, 0);
+
+	return val;
+}
+
 static inline unsigned long long native_read_msr_safe(unsigned int msr,
 						      int *err)
 {
@@ -116,29 +142,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
 
 /* Can be uninlined because referenced by paravirt */
 static inline void notrace
-__native_write_msr_notrace(unsigned int msr, u32 low, u32 high)
-{
-	asm volatile("1: wrmsr\n"
-		     "2:\n"
-		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
-		     : : "c" (msr), "a"(low), "d" (high) : "memory");
-}
-
-/* Can be uninlined because referenced by paravirt */
-static inline void notrace
 native_write_msr(unsigned int msr, u32 low, u32 high)
 {
-	__native_write_msr_notrace(msr, low, high);
+	__wrmsr(msr, low, high);
+
 	if (msr_tracepoint_active(__tracepoint_write_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
 
-static inline void
-wrmsr_notrace(unsigned int msr, u32 low, u32 high)
-{
-	__native_write_msr_notrace(msr, low, high);
-}
-
 /* Can be uninlined because referenced by paravirt */
 static inline int notrace
 native_write_msr_safe(unsigned int msr, u32 low, u32 high)
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 079e81733a58..7889ae492af0 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -42,16 +42,19 @@ static struct equiv_cpu_entry *equiv_cpu_table;
 
 /*
  * This points to the current valid container of microcode patches which we will
- * save from the initrd/builtin before jettisoning its contents.
+ * save from the initrd/builtin before jettisoning its contents. @mc is the
+ * microcode patch we found to match.
  */
-struct container {
-	u8 *data;
-	size_t size;
-} cont;
+struct cont_desc {
+	struct microcode_amd *mc;
+	u32		     cpuid_1_eax;
+	u32		     psize;
+	u8		     *data;
+	size_t		     size;
+};
 
 static u32 ucode_new_rev;
 static u8 amd_ucode_patch[PATCH_MAX_SIZE];
-static u16 this_equiv_id;
 
 /*
  * Microcode patch container file is prepended to the initrd in cpio
@@ -60,57 +63,13 @@ static u16 this_equiv_id;
 static const char
 ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
 
-static size_t compute_container_size(u8 *data, u32 total_size)
+static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig)
 {
-	size_t size = 0;
-	u32 *header = (u32 *)data;
-
-	if (header[0] != UCODE_MAGIC ||
-	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
-	    header[2] == 0)                            /* size */
-		return size;
-
-	size = header[2] + CONTAINER_HDR_SZ;
-	total_size -= size;
-	data += size;
-
-	while (total_size) {
-		u16 patch_size;
-
-		header = (u32 *)data;
-
-		if (header[0] != UCODE_UCODE_TYPE)
-			break;
-
-		/*
-		 * Sanity-check patch size.
-		 */
-		patch_size = header[1];
-		if (patch_size > PATCH_MAX_SIZE)
-			break;
-
-		size	   += patch_size + SECTION_HDR_SIZE;
-		data	   += patch_size + SECTION_HDR_SIZE;
-		total_size -= patch_size + SECTION_HDR_SIZE;
+	for (; equiv_table && equiv_table->installed_cpu; equiv_table++) {
+		if (sig == equiv_table->installed_cpu)
+			return equiv_table->equiv_cpu;
 	}
 
-	return size;
-}
-
-static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
-				unsigned int sig)
-{
-	int i = 0;
-
-	if (!equiv_cpu_table)
-		return 0;
-
-	while (equiv_cpu_table[i].installed_cpu != 0) {
-		if (sig == equiv_cpu_table[i].installed_cpu)
-			return equiv_cpu_table[i].equiv_cpu;
-
-		i++;
-	}
 	return 0;
 }
 
@@ -118,91 +77,109 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
  * This scans the ucode blob for the proper container as we can have multiple
  * containers glued together. Returns the equivalence ID from the equivalence
  * table or 0 if none found.
+ * Returns the amount of bytes consumed while scanning. @desc contains all the
+ * data we're going to use in later stages of the application.
  */
-static u16
-find_proper_container(u8 *ucode, size_t size, struct container *ret_cont)
+static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc)
 {
-	struct container ret = { NULL, 0 };
-	u32 eax, ebx, ecx, edx;
 	struct equiv_cpu_entry *eq;
-	int offset, left;
-	u16 eq_id = 0;
-	u32 *header;
-	u8 *data;
+	ssize_t orig_size = size;
+	u32 *hdr = (u32 *)ucode;
+	u16 eq_id;
+	u8 *buf;
 
-	data   = ucode;
-	left   = size;
-	header = (u32 *)data;
+	/* Am I looking at an equivalence table header? */
+	if (hdr[0] != UCODE_MAGIC ||
+	    hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE ||
+	    hdr[2] == 0)
+		return CONTAINER_HDR_SZ;
 
+	buf = ucode;
 
-	/* find equiv cpu table */
-	if (header[0] != UCODE_MAGIC ||
-	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
-	    header[2] == 0)                            /* size */
-		return eq_id;
+	eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ);
 
-	eax = 0x00000001;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
+	/* Find the equivalence ID of our CPU in this table: */
+	eq_id = find_equiv_id(eq, desc->cpuid_1_eax);
 
-	while (left > 0) {
-		eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+	buf  += hdr[2] + CONTAINER_HDR_SZ;
+	size -= hdr[2] + CONTAINER_HDR_SZ;
+
+	/*
+	 * Scan through the rest of the container to find where it ends. We do
+	 * some basic sanity-checking too.
+	 */
+	while (size > 0) {
+		struct microcode_amd *mc;
+		u32 patch_size;
 
-		ret.data = data;
+		hdr = (u32 *)buf;
 
-		/* Advance past the container header */
-		offset = header[2] + CONTAINER_HDR_SZ;
-		data  += offset;
-		left  -= offset;
+		if (hdr[0] != UCODE_UCODE_TYPE)
+			break;
 
-		eq_id = find_equiv_id(eq, eax);
-		if (eq_id) {
-			ret.size = compute_container_size(ret.data, left + offset);
+		/* Sanity-check patch size. */
+		patch_size = hdr[1];
+		if (patch_size > PATCH_MAX_SIZE)
+			break;
 
-			/*
-			 * truncate how much we need to iterate over in the
-			 * ucode update loop below
-			 */
-			left = ret.size - offset;
+		/* Skip patch section header: */
+		buf  += SECTION_HDR_SIZE;
+		size -= SECTION_HDR_SIZE;
 
-			*ret_cont = ret;
-			return eq_id;
+		mc = (struct microcode_amd *)buf;
+		if (eq_id == mc->hdr.processor_rev_id) {
+			desc->psize = patch_size;
+			desc->mc = mc;
 		}
 
-		/*
-		 * support multiple container files appended together. if this
-		 * one does not have a matching equivalent cpu entry, we fast
-		 * forward to the next container file.
-		 */
-		while (left > 0) {
-			header = (u32 *)data;
-
-			if (header[0] == UCODE_MAGIC &&
-			    header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
-				break;
-
-			offset = header[1] + SECTION_HDR_SIZE;
-			data  += offset;
-			left  -= offset;
-		}
+		buf  += patch_size;
+		size -= patch_size;
+	}
 
-		/* mark where the next microcode container file starts */
-		offset    = data - (u8 *)ucode;
-		ucode     = data;
+	/*
+	 * If we have found a patch (desc->mc), it means we're looking at the
+	 * container which has a patch for this CPU so return 0 to mean, @ucode
+	 * already points to the proper container. Otherwise, we return the size
+	 * we scanned so that we can advance to the next container in the
+	 * buffer.
+	 */
+	if (desc->mc) {
+		desc->data = ucode;
+		desc->size = orig_size - size;
+
+		return 0;
 	}
 
-	return eq_id;
+	return orig_size - size;
 }
 
-static int __apply_microcode_amd(struct microcode_amd *mc_amd)
+/*
+ * Scan the ucode blob for the proper container as we can have multiple
+ * containers glued together.
+ */
+static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc)
+{
+	ssize_t rem = size;
+
+	while (rem >= 0) {
+		ssize_t s = parse_container(ucode, rem, desc);
+		if (!s)
+			return;
+
+		ucode += s;
+		rem   -= s;
+	}
+}
+
+static int __apply_microcode_amd(struct microcode_amd *mc)
 {
 	u32 rev, dummy;
 
-	native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+	native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc->hdr.data_code);
 
 	/* verify patch application was successful */
 	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-	if (rev != mc_amd->hdr.patch_id)
+	if (rev != mc->hdr.patch_id)
 		return -1;
 
 	return 0;
@@ -217,17 +194,16 @@ static int __apply_microcode_amd(struct microcode_amd *mc_amd)
  * load_microcode_amd() to save equivalent cpu table and microcode patches in
  * kernel heap memory.
  *
- * Returns true if container found (sets @ret_cont), false otherwise.
+ * Returns true if container found (sets @desc), false otherwise.
  */
-static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch,
-				      struct container *ret_cont)
+static bool
+apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch)
 {
+	struct cont_desc desc = { 0 };
 	u8 (*patch)[PATCH_MAX_SIZE];
-	u32 rev, *header, *new_rev;
-	struct container ret;
-	int offset, left;
-	u16 eq_id = 0;
-	u8  *data;
+	struct microcode_amd *mc;
+	u32 rev, dummy, *new_rev;
+	bool ret = false;
 
 #ifdef CONFIG_X86_32
 	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
@@ -237,50 +213,27 @@ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch,
 	patch	= &amd_ucode_patch;
 #endif
 
-	if (check_current_patch_level(&rev, true))
-		return false;
-
-	eq_id = find_proper_container(ucode, size, &ret);
-	if (!eq_id)
-		return false;
-
-	this_equiv_id = eq_id;
-	header = (u32 *)ret.data;
-
-	/* We're pointing to an equiv table, skip over it. */
-	data = ret.data +  header[2] + CONTAINER_HDR_SZ;
-	left = ret.size - (header[2] + CONTAINER_HDR_SZ);
-
-	while (left > 0) {
-		struct microcode_amd *mc;
-
-		header = (u32 *)data;
-		if (header[0] != UCODE_UCODE_TYPE || /* type */
-		    header[1] == 0)                  /* size */
-			break;
+	desc.cpuid_1_eax = cpuid_1_eax;
 
-		mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+	scan_containers(ucode, size, &desc);
 
-		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
+	mc = desc.mc;
+	if (!mc)
+		return ret;
 
-			if (!__apply_microcode_amd(mc)) {
-				rev = mc->hdr.patch_id;
-				*new_rev = rev;
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+	if (rev >= mc->hdr.patch_id)
+		return ret;
 
-				if (save_patch)
-					memcpy(patch, mc, min_t(u32, header[1], PATCH_MAX_SIZE));
-			}
-		}
+	if (!__apply_microcode_amd(mc)) {
+		*new_rev = mc->hdr.patch_id;
+		ret      = true;
 
-		offset  = header[1] + SECTION_HDR_SIZE;
-		data   += offset;
-		left   -= offset;
+		if (save_patch)
+			memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE));
 	}
 
-	if (ret_cont)
-		*ret_cont = ret;
-
-	return true;
+	return ret;
 }
 
 static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
@@ -298,10 +251,9 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
 #endif
 }
 
-void __init load_ucode_amd_bsp(unsigned int family)
+void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret)
 {
 	struct ucode_cpu_info *uci;
-	u32 eax, ebx, ecx, edx;
 	struct cpio_data cp;
 	const char *path;
 	bool use_pa;
@@ -316,184 +268,95 @@ void __init load_ucode_amd_bsp(unsigned int family)
 		use_pa	= false;
 	}
 
-	if (!get_builtin_microcode(&cp, family))
+	if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax)))
 		cp = find_microcode_in_initrd(path, use_pa);
 
-	if (!(cp.data && cp.size))
-		return;
-
-	/* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */
-	eax = 1;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	uci->cpu_sig.sig = eax;
+	/* Needed in load_microcode_amd() */
+	uci->cpu_sig.sig = cpuid_1_eax;
 
-	apply_microcode_early_amd(cp.data, cp.size, true, NULL);
+	*ret = cp;
 }
 
-#ifdef CONFIG_X86_32
-/*
- * On 32-bit, since AP's early load occurs before paging is turned on, we
- * cannot traverse cpu_equiv_table and microcode_cache in kernel heap memory.
- * So during cold boot, AP will apply_ucode_in_initrd() just like the BSP.
- * In save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
- * which is used upon resume from suspend.
- */
-void load_ucode_amd_ap(unsigned int family)
+void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
 {
-	struct microcode_amd *mc;
-	struct cpio_data cp;
-
-	mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
-	if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
-		__apply_microcode_amd(mc);
-		return;
-	}
-
-	if (!get_builtin_microcode(&cp, family))
-		cp = find_microcode_in_initrd((const char *)__pa_nodebug(ucode_path), true);
+	struct cpio_data cp = { };
 
+	__load_ucode_amd(cpuid_1_eax, &cp);
 	if (!(cp.data && cp.size))
 		return;
 
-	/*
-	 * This would set amd_ucode_patch above so that the following APs can
-	 * use it directly instead of going down this path again.
-	 */
-	apply_microcode_early_amd(cp.data, cp.size, true, NULL);
+	apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true);
 }
-#else
-void load_ucode_amd_ap(unsigned int family)
+
+void load_ucode_amd_ap(unsigned int cpuid_1_eax)
 {
-	struct equiv_cpu_entry *eq;
 	struct microcode_amd *mc;
-	u32 rev, eax;
-	u16 eq_id;
-
-	/* 64-bit runs with paging enabled, thus early==false. */
-	if (check_current_patch_level(&rev, false))
-		return;
-
-	/* First AP hasn't cached it yet, go through the blob. */
-	if (!cont.data) {
-		struct cpio_data cp = { NULL, 0, "" };
+	struct cpio_data cp;
+	u32 *new_rev, rev, dummy;
 
-		if (cont.size == -1)
-			return;
+	if (IS_ENABLED(CONFIG_X86_32)) {
+		mc	= (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
+		new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+	} else {
+		mc	= (struct microcode_amd *)amd_ucode_patch;
+		new_rev = &ucode_new_rev;
+	}
 
-reget:
-		if (!get_builtin_microcode(&cp, family)) {
-#ifdef CONFIG_BLK_DEV_INITRD
-			if (!initrd_gone)
-				cp = find_cpio_data(ucode_path, (void *)initrd_start,
-						    initrd_end - initrd_start, NULL);
-#endif
-			if (!(cp.data && cp.size)) {
-				/*
-				 * Mark it so that other APs do not scan again
-				 * for no real reason and slow down boot
-				 * needlessly.
-				 */
-				cont.size = -1;
-				return;
-			}
-		}
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
-		if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) {
-			cont.size = -1;
+	/* Check whether we have saved a new patch already: */
+	if (*new_rev && rev < mc->hdr.patch_id) {
+		if (!__apply_microcode_amd(mc)) {
+			*new_rev = mc->hdr.patch_id;
 			return;
 		}
 	}
 
-	eax = cpuid_eax(0x00000001);
-	eq  = (struct equiv_cpu_entry *)(cont.data + CONTAINER_HDR_SZ);
-
-	eq_id = find_equiv_id(eq, eax);
-	if (!eq_id)
+	__load_ucode_amd(cpuid_1_eax, &cp);
+	if (!(cp.data && cp.size))
 		return;
 
-	if (eq_id == this_equiv_id) {
-		mc = (struct microcode_amd *)amd_ucode_patch;
-
-		if (mc && rev < mc->hdr.patch_id) {
-			if (!__apply_microcode_amd(mc))
-				ucode_new_rev = mc->hdr.patch_id;
-		}
-
-	} else {
-
-		/*
-		 * AP has a different equivalence ID than BSP, looks like
-		 * mixed-steppings silicon so go through the ucode blob anew.
-		 */
-		goto reget;
-	}
+	apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false);
 }
-#endif /* CONFIG_X86_32 */
 
 static enum ucode_state
 load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
 
-int __init save_microcode_in_initrd_amd(unsigned int fam)
+int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
 {
+	struct cont_desc desc = { 0 };
 	enum ucode_state ret;
-	int retval = 0;
-	u16 eq_id;
-
-	if (!cont.data) {
-		if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) {
-			struct cpio_data cp = { NULL, 0, "" };
-
-#ifdef CONFIG_BLK_DEV_INITRD
-			cp = find_cpio_data(ucode_path, (void *)initrd_start,
-					    initrd_end - initrd_start, NULL);
-#endif
+	struct cpio_data cp;
 
-			if (!(cp.data && cp.size)) {
-				cont.size = -1;
-				return -EINVAL;
-			}
+	cp = find_microcode_in_initrd(ucode_path, false);
+	if (!(cp.data && cp.size))
+		return -EINVAL;
 
-			eq_id = find_proper_container(cp.data, cp.size, &cont);
-			if (!eq_id) {
-				cont.size = -1;
-				return -EINVAL;
-			}
+	desc.cpuid_1_eax = cpuid_1_eax;
 
-		} else
-			return -EINVAL;
-	}
+	scan_containers(cp.data, cp.size, &desc);
+	if (!desc.mc)
+		return -EINVAL;
 
-	ret = load_microcode_amd(smp_processor_id(), fam, cont.data, cont.size);
+	ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
+				 desc.data, desc.size);
 	if (ret != UCODE_OK)
-		retval = -EINVAL;
-
-	/*
-	 * This will be freed any msec now, stash patches for the current
-	 * family and switch to patch cache for cpu hotplug, etc later.
-	 */
-	cont.data = NULL;
-	cont.size = 0;
+		return -EINVAL;
 
-	return retval;
+	return 0;
 }
 
 void reload_ucode_amd(void)
 {
 	struct microcode_amd *mc;
-	u32 rev;
-
-	/*
-	 * early==false because this is a syscore ->resume path and by
-	 * that time paging is long enabled.
-	 */
-	if (check_current_patch_level(&rev, false))
-		return;
+	u32 rev, dummy;
 
 	mc = (struct microcode_amd *)amd_ucode_patch;
 	if (!mc)
 		return;
 
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
 	if (rev < mc->hdr.patch_id) {
 		if (!__apply_microcode_amd(mc)) {
 			ucode_new_rev = mc->hdr.patch_id;
@@ -631,60 +494,13 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 	return patch_size;
 }
 
-/*
- * Those patch levels cannot be updated to newer ones and thus should be final.
- */
-static u32 final_levels[] = {
-	0x01000098,
-	0x0100009f,
-	0x010000af,
-	0, /* T-101 terminator */
-};
-
-/*
- * Check the current patch level on this CPU.
- *
- * @rev: Use it to return the patch level. It is set to 0 in the case of
- * error.
- *
- * Returns:
- *  - true: if update should stop
- *  - false: otherwise
- */
-bool check_current_patch_level(u32 *rev, bool early)
-{
-	u32 lvl, dummy, i;
-	bool ret = false;
-	u32 *levels;
-
-	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
-
-	if (IS_ENABLED(CONFIG_X86_32) && early)
-		levels = (u32 *)__pa_nodebug(&final_levels);
-	else
-		levels = final_levels;
-
-	for (i = 0; levels[i]; i++) {
-		if (lvl == levels[i]) {
-			lvl = 0;
-			ret = true;
-			break;
-		}
-	}
-
-	if (rev)
-		*rev = lvl;
-
-	return ret;
-}
-
 static int apply_microcode_amd(int cpu)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct microcode_amd *mc_amd;
 	struct ucode_cpu_info *uci;
 	struct ucode_patch *p;
-	u32 rev;
+	u32 rev, dummy;
 
 	BUG_ON(raw_smp_processor_id() != cpu);
 
@@ -697,8 +513,7 @@ static int apply_microcode_amd(int cpu)
 	mc_amd  = p->data;
 	uci->mc = p->data;
 
-	if (check_current_patch_level(&rev, false))
-		return -1;
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
 	/* need to apply patch? */
 	if (rev >= mc_amd->hdr.patch_id) {
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 73102d932760..b4a4cd39b358 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -66,19 +66,50 @@ static DEFINE_MUTEX(microcode_mutex);
 
 struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 
-/*
- * Operations that are run on a target cpu:
- */
-
 struct cpu_info_ctx {
 	struct cpu_signature	*cpu_sig;
 	int			err;
 };
 
+/*
+ * Those patch levels cannot be updated to newer ones and thus should be final.
+ */
+static u32 final_levels[] = {
+	0x01000098,
+	0x0100009f,
+	0x010000af,
+	0, /* T-101 terminator */
+};
+
+/*
+ * Check the current patch level on this CPU.
+ *
+ * Returns:
+ *  - true: if update should stop
+ *  - false: otherwise
+ */
+static bool amd_check_current_patch_level(void)
+{
+	u32 lvl, dummy, i;
+	u32 *levels;
+
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
+
+	if (IS_ENABLED(CONFIG_X86_32))
+		levels = (u32 *)__pa_nodebug(&final_levels);
+	else
+		levels = final_levels;
+
+	for (i = 0; levels[i]; i++) {
+		if (lvl == levels[i])
+			return true;
+	}
+	return false;
+}
+
 static bool __init check_loader_disabled_bsp(void)
 {
 	static const char *__dis_opt_str = "dis_ucode_ldr";
-	u32 a, b, c, d;
 
 #ifdef CONFIG_X86_32
 	const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
@@ -94,18 +125,19 @@ static bool __init check_loader_disabled_bsp(void)
 	if (!have_cpuid_p())
 		return *res;
 
-	a = 1;
-	c = 0;
-	native_cpuid(&a, &b, &c, &d);
-
 	/*
 	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
 	 * completely accurate as xen pv guests don't see that CPUID bit set but
 	 * that's good enough as they don't land on the BSP path anyway.
 	 */
-	if (c & BIT(31))
+	if (native_cpuid_ecx(1) & BIT(31))
 		return *res;
 
+	if (x86_cpuid_vendor() == X86_VENDOR_AMD) {
+		if (amd_check_current_patch_level())
+			return *res;
+	}
+
 	if (cmdline_find_option_bool(cmdline, option) <= 0)
 		*res = false;
 
@@ -133,23 +165,21 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
 
 void __init load_ucode_bsp(void)
 {
-	int vendor;
-	unsigned int family;
+	unsigned int cpuid_1_eax;
 
 	if (check_loader_disabled_bsp())
 		return;
 
-	vendor = x86_cpuid_vendor();
-	family = x86_cpuid_family();
+	cpuid_1_eax = native_cpuid_eax(1);
 
-	switch (vendor) {
+	switch (x86_cpuid_vendor()) {
 	case X86_VENDOR_INTEL:
-		if (family >= 6)
+		if (x86_family(cpuid_1_eax) >= 6)
 			load_ucode_intel_bsp();
 		break;
 	case X86_VENDOR_AMD:
-		if (family >= 0x10)
-			load_ucode_amd_bsp(family);
+		if (x86_family(cpuid_1_eax) >= 0x10)
+			load_ucode_amd_bsp(cpuid_1_eax);
 		break;
 	default:
 		break;
@@ -167,22 +197,21 @@ static bool check_loader_disabled_ap(void)
 
 void load_ucode_ap(void)
 {
-	int vendor, family;
+	unsigned int cpuid_1_eax;
 
 	if (check_loader_disabled_ap())
 		return;
 
-	vendor = x86_cpuid_vendor();
-	family = x86_cpuid_family();
+	cpuid_1_eax = native_cpuid_eax(1);
 
-	switch (vendor) {
+	switch (x86_cpuid_vendor()) {
 	case X86_VENDOR_INTEL:
-		if (family >= 6)
+		if (x86_family(cpuid_1_eax) >= 6)
 			load_ucode_intel_ap();
 		break;
 	case X86_VENDOR_AMD:
-		if (family >= 0x10)
-			load_ucode_amd_ap(family);
+		if (x86_family(cpuid_1_eax) >= 0x10)
+			load_ucode_amd_ap(cpuid_1_eax);
 		break;
 	default:
 		break;
@@ -201,7 +230,7 @@ static int __init save_microcode_in_initrd(void)
 		break;
 	case X86_VENDOR_AMD:
 		if (c->x86 >= 0x10)
-			ret = save_microcode_in_initrd_amd(c->x86);
+			return save_microcode_in_initrd_amd(cpuid_eax(1));
 		break;
 	default:
 		break;