lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20161122111133.mjzpvzhf7o7yl2oa@pd.tnic>
Date:   Tue, 22 Nov 2016 12:11:33 +0100
From:   Borislav Petkov <bp@...en8.de>
To:     Ingo Molnar <mingo@...nel.org>
Cc:     hpa@...or.com, aravindksg.lkml@...il.com, tglx@...utronix.de,
        Yazen.Ghannam@....com, linux-kernel@...r.kernel.org,
        tony.luck@...el.com, linux-edac@...r.kernel.org,
        linux-tip-commits@...r.kernel.org
Subject: Re: [tip:ras/core] x86/mce/AMD: Add system physical address
 translation for AMD Fam17h

On Mon, Nov 21, 2016 at 01:03:54PM +0100, Ingo Molnar wrote:
> So this commit is still sick with the attached config:
> 
>   arch/x86/built-in.o: In function `umc_normaddr_to_sysaddr':
>   (.text+0x65238): undefined reference to `amd_df_indirect_read'
>   ...
>   Makefile:962: recipe for target 'vmlinux' failed
>   make: *** [vmlinux] Error 1
> 
> I've zapped it for now, please resubmit a working version.

Ok, here's a hopefully fixed one, it passes *config smoke test builds
here.

---
From: Yazen Ghannam <Yazen.Ghannam@....com>
Date: Thu, 17 Nov 2016 17:57:27 -0500
Subject: [PATCH] x86/mce/AMD: Add system physical address translation for AMD
 Fam17h

The Unified Memory Controllers (UMCs) on Fam17h log a normalized address
in their MCA_ADDR registers. We need to convert that normalized address
to a system phyisical address in order to support a few facilities:

1) To offline poisoned pages in DRAM proactively in the deferred error
handler.

2) To print sysaddr and page info for DRAM ECC errors in EDAC.

[ Boris: fixes/cleanups ontop:

* hi_addr_offset = 0 - no need for that branch. Stick it all under the
HiAddrOffsetEn case. It confines hi_addr_offset's declaration too.

* Move variables to the innermost scope they're used at so that we save
on stack and not blow it up immediately on function entry.

* do not modify *sys_addr prematurely - we want to not exit early and
have modified *sys_addr some, which callers get to see. We either
convert to a sys_addr or we don't do anything. And we signal that with
the retval of the function.

* rename label out -> out_err - because it is the error path.

* no need to pr_err of the conversion failed case: imagine a
sparsely-populated machine with UMCs which don't have DIMMs. Callers
should look at the retval instead and issue a printk only when really
necessary. No need for useless info in dmesg.

* s/temp_reg/tmp/ and other variable names shortening => shorter code.

* use BIT() everywhere

* make error messages more informative

* ... and more minor cleanups.

]

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@....com>
Cc: Tony Luck <tony.luck@...el.com>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@...il.com>
Cc: linux-edac <linux-edac@...r.kernel.org>
Link: http://lkml.kernel.org/r/1479423463-8536-2-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Borislav Petkov <bp@...e.de>
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
[ Small build fix for the !CONFIG_X86_MCE_AMD case. ]
Signed-off-by: Ingo Molnar <mingo@...nel.org>
[ Add dependency on AMD_NB. ]
Signed-off-by: Borislav Petkov <bp@...e.de>
---
 arch/x86/Kconfig                     |   2 +-
 arch/x86/include/asm/mce.h           |   2 +
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 200 +++++++++++++++++++++++++++++++++++
 3 files changed, 203 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bada636d1065..5c81f3967a5d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1025,7 +1025,7 @@ config X86_MCE_INTEL
 config X86_MCE_AMD
 	def_bool y
 	prompt "AMD MCE features"
-	depends on X86_MCE && X86_LOCAL_APIC
+	depends on X86_MCE && X86_LOCAL_APIC && AMD_NB
 	---help---
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ecda6a93179f..caab413c91ad 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -252,8 +252,10 @@ static inline void cmci_recheck(void) {}
 
 #ifdef CONFIG_X86_MCE_AMD
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
 #else
 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
+static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
 #endif
 
 int mce_available(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5b33702ca86e..1a0b0a0e5e2b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -553,6 +553,206 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 		deferred_error_interrupt_enable(c);
 }
 
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
+{
+	u64 dram_base_addr, dram_limit_addr, dram_hole_base;
+	/* We start from the normalized address */
+	u64 ret_addr = norm_addr;
+
+	u32 tmp;
+
+	u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
+	u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
+	u8 intlv_addr_sel, intlv_addr_bit;
+	u8 num_intlv_bits, hashed_bit;
+	u8 lgcy_mmio_hole_en, base = 0;
+	u8 cs_mask, cs_id = 0;
+	bool hash_enabled = false;
+
+	/* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
+	if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp))
+		goto out_err;
+
+	/* Remove HiAddrOffset from normalized address, if enabled: */
+	if (tmp & BIT(0)) {
+		u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
+
+		if (norm_addr >= hi_addr_offset) {
+			ret_addr -= hi_addr_offset;
+			base = 1;
+		}
+	}
+
+	/* Read D18F0x110 (DramBaseAddress). */
+	if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp))
+		goto out_err;
+
+	/* Check if address range is valid. */
+	if (!(tmp & BIT(0))) {
+		pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
+			__func__, tmp);
+		goto out_err;
+	}
+
+	lgcy_mmio_hole_en = tmp & BIT(1);
+	intlv_num_chan	  = (tmp >> 4) & 0xF;
+	intlv_addr_sel	  = (tmp >> 8) & 0x7;
+	dram_base_addr	  = (tmp & GENMASK_ULL(31, 12)) << 16;
+
+	/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
+	if (intlv_addr_sel > 3) {
+		pr_err("%s: Invalid interleave address select %d.\n",
+			__func__, intlv_addr_sel);
+		goto out_err;
+	}
+
+	/* Read D18F0x114 (DramLimitAddress). */
+	if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
+		goto out_err;
+
+	intlv_num_sockets = (tmp >> 8) & 0x1;
+	intlv_num_dies	  = (tmp >> 10) & 0x3;
+	dram_limit_addr	  = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
+
+	intlv_addr_bit = intlv_addr_sel + 8;
+
+	/* Re-use intlv_num_chan by setting it equal to log2(#channels) */
+	switch (intlv_num_chan) {
+	case 0:	intlv_num_chan = 0; break;
+	case 1: intlv_num_chan = 1; break;
+	case 3: intlv_num_chan = 2; break;
+	case 5:	intlv_num_chan = 3; break;
+	case 7:	intlv_num_chan = 4; break;
+
+	case 8: intlv_num_chan = 1;
+		hash_enabled = true;
+		break;
+	default:
+		pr_err("%s: Invalid number of interleaved channels %d.\n",
+			__func__, intlv_num_chan);
+		goto out_err;
+	}
+
+	num_intlv_bits = intlv_num_chan;
+
+	if (intlv_num_dies > 2) {
+		pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
+			__func__, intlv_num_dies);
+		goto out_err;
+	}
+
+	num_intlv_bits += intlv_num_dies;
+
+	/* Add a bit if sockets are interleaved. */
+	num_intlv_bits += intlv_num_sockets;
+
+	/* Assert num_intlv_bits <= 4 */
+	if (num_intlv_bits > 4) {
+		pr_err("%s: Invalid interleave bits %d.\n",
+			__func__, num_intlv_bits);
+		goto out_err;
+	}
+
+	if (num_intlv_bits > 0) {
+		u64 temp_addr_x, temp_addr_i, temp_addr_y;
+		u8 die_id_bit, sock_id_bit, cs_fabric_id;
+
+		/*
+		 * Read FabricBlockInstanceInformation3_CS[BlockFabricID].
+		 * This is the fabric id for this coherent slave. Use
+		 * umc/channel# as instance id of the coherent slave
+		 * for FICAA.
+		 */
+		if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp))
+			goto out_err;
+
+		cs_fabric_id = (tmp >> 8) & 0xFF;
+		die_id_bit   = 0;
+
+		/* If interleaved over more than 1 channel: */
+		if (intlv_num_chan) {
+			die_id_bit = intlv_num_chan;
+			cs_mask	   = (1 << die_id_bit) - 1;
+			cs_id	   = cs_fabric_id & cs_mask;
+		}
+
+		sock_id_bit = die_id_bit;
+
+		/* Read D18F1x208 (SystemFabricIdMask). */
+		if (intlv_num_dies || intlv_num_sockets)
+			if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
+				goto out_err;
+
+		/* If interleaved over more than 1 die. */
+		if (intlv_num_dies) {
+			sock_id_bit  = die_id_bit + intlv_num_dies;
+			die_id_shift = (tmp >> 24) & 0xF;
+			die_id_mask  = (tmp >> 8) & 0xFF;
+
+			cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
+		}
+
+		/* If interleaved over more than 1 socket. */
+		if (intlv_num_sockets) {
+			socket_id_shift	= (tmp >> 28) & 0xF;
+			socket_id_mask	= (tmp >> 16) & 0xFF;
+
+			cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
+		}
+
+		/*
+		 * The pre-interleaved address consists of XXXXXXIIIYYYYY
+		 * where III is the ID for this CS, and XXXXXXYYYYY are the
+		 * address bits from the post-interleaved address.
+		 * "num_intlv_bits" has been calculated to tell us how many "I"
+		 * bits there are. "intlv_addr_bit" tells us how many "Y" bits
+		 * there are (where "I" starts).
+		 */
+		temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0);
+		temp_addr_i = (cs_id << intlv_addr_bit);
+		temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
+		ret_addr    = temp_addr_x | temp_addr_i | temp_addr_y;
+	}
+
+	/* Add dram base address */
+	ret_addr += dram_base_addr;
+
+	/* If legacy MMIO hole enabled */
+	if (lgcy_mmio_hole_en) {
+		if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp))
+			goto out_err;
+
+		dram_hole_base = tmp & GENMASK(31, 24);
+		if (ret_addr >= dram_hole_base)
+			ret_addr += (BIT_ULL(32) - dram_hole_base);
+	}
+
+	if (hash_enabled) {
+		/* Save some parentheses and grab ls-bit at the end. */
+		hashed_bit =	(ret_addr >> 12) ^
+				(ret_addr >> 18) ^
+				(ret_addr >> 21) ^
+				(ret_addr >> 30) ^
+				cs_id;
+
+		hashed_bit &= BIT(0);
+
+		if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0)))
+			ret_addr ^= BIT(intlv_addr_bit);
+	}
+
+	/* Is calculated system address is above DRAM limit address? */
+	if (ret_addr > dram_limit_addr)
+		goto out_err;
+
+	*sys_addr = ret_addr;
+	return 0;
+
+out_err:
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
+
 static void
 __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
 {
-- 
2.10.0

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ