lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221205160043.57465-4-xiexiuqi@huawei.com>
Date:   Tue, 6 Dec 2022 00:00:42 +0800
From:   Xie XiuQi <xiexiuqi@...wei.com>
To:     <catalin.marinas@....com>, <will@...nel.org>,
        <james.morse@....com>, <rafael@...nel.org>, <tony.luck@...el.com>,
        <robert.moore@...el.com>, <bp@...en8.de>, <devel@...ica.org>,
        <linux-arm-kernel@...ts.infradead.org>,
        <linux-acpi@...r.kernel.org>, <linux-kernel@...r.kernel.org>
CC:     <tanxiaofei@...wei.com>, <wangxiongfeng2@...wei.com>,
        <lvying6@...wei.com>, <naoya.horiguchi@....com>,
        <wangkefeng.wang@...wei.com>
Subject: [PATCH v3 3/4] arm64: ghes: handle the case when memory_failure recovery failed

memory_failure() may not always recovery successfully. In synchronous 
external data abort case, if memory_failure() recovery failed, we must handle it.

In this case, if the recovery fails, the common helper function
arch_apei_do_recovery_failed() is invoked. For arm64 platform, we just
send a SIGBUS.

Signed-off-by: Xie XiuQi <xiexiuqi@...wei.com>
---
 drivers/acpi/apei/ghes.c |  3 ++-
 include/linux/mm.h       |  2 +-
 mm/memory-failure.c      | 24 +++++++++++++++++-------
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ba0631c54c52..ddc4da603215 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -435,7 +435,8 @@ static void ghes_kick_task_work(struct callback_head *head)
 
 	estatus_node = container_of(head, struct ghes_estatus_node, task_work);
 	if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
-		memory_failure_queue_kick(estatus_node->task_work_cpu);
+		if (memory_failure_queue_kick(estatus_node->task_work_cpu))
+			arch_apei_do_recovery_failed();
 
 	estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 	node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 974ccca609d2..126d1395c208 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3290,7 +3290,7 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
 		      unsigned long count, int mf_flags);
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
-extern void memory_failure_queue_kick(int cpu);
+extern int memory_failure_queue_kick(int cpu);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index bead6bccc7f2..b9398f67264a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2240,12 +2240,12 @@ void memory_failure_queue(unsigned long pfn, int flags)
 }
 EXPORT_SYMBOL_GPL(memory_failure_queue);
 
-static void memory_failure_work_func(struct work_struct *work)
+static int __memory_failure_work_func(struct work_struct *work)
 {
 	struct memory_failure_cpu *mf_cpu;
 	struct memory_failure_entry entry = { 0, };
 	unsigned long proc_flags;
-	int gotten;
+	int gotten, ret = 0, result;
 
 	mf_cpu = container_of(work, struct memory_failure_cpu, work);
 	for (;;) {
@@ -2254,24 +2254,34 @@ static void memory_failure_work_func(struct work_struct *work)
 		spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
 		if (!gotten)
 			break;
-		if (entry.flags & MF_SOFT_OFFLINE)
+		if (entry.flags & MF_SOFT_OFFLINE) {
 			soft_offline_page(entry.pfn, entry.flags);
-		else
-			memory_failure(entry.pfn, entry.flags);
+		} else {
+			result = memory_failure(entry.pfn, entry.flags);
+			if (ret == 0 && result != 0)
+				ret = result;
+		}
 	}
+
+	return ret;
+}
+
+static void memory_failure_work_func(struct work_struct *work)
+{
+	__memory_failure_work_func(work);
 }
 
 /*
  * Process memory_failure work queued on the specified CPU.
  * Used to avoid return-to-userspace racing with the memory_failure workqueue.
  */
-void memory_failure_queue_kick(int cpu)
+int memory_failure_queue_kick(int cpu)
 {
 	struct memory_failure_cpu *mf_cpu;
 
 	mf_cpu = &per_cpu(memory_failure_cpu, cpu);
 	cancel_work_sync(&mf_cpu->work);
-	memory_failure_work_func(&mf_cpu->work);
+	return __memory_failure_work_func(&mf_cpu->work);
 }
 
 static int __init memory_failure_init(void)
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ