[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <161604048859.1463742.10087657197118774859.stgit@dwillia2-desk3.amr.corp.intel.com>
Date: Wed, 17 Mar 2021 21:08:08 -0700
From: Dan Williams <dan.j.williams@...el.com>
To: linux-mm@...ck.org, linux-nvdimm@...ts.01.org
Cc: Naoya Horiguchi <naoya.horiguchi@....com>,
Andrew Morton <akpm@...ux-foundation.org>,
vishal.l.verma@...el.com, david@...morbit.com, hch@....de,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
akpm@...ux-foundation.org
Subject: [PATCH 1/3] mm/memory-failure: Prepare for mass memory_failure()
Currently memory_failure() assumes an infrequent report on a handful of
pages. A new use case for surprise removal of a persistent memory device
needs to trigger memory_failure() on a large range. Rate limit
memory_failure() error logging, and allow the
memory_failure_dev_pagemap() helper to be called directly.
Cc: Naoya Horiguchi <naoya.horiguchi@....com>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@...el.com>
---
mm/memory-failure.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 24210c9bd843..43ba4307c526 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -395,8 +395,9 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* signal and then access the memory. Just kill it.
*/
if (fail || tk->addr == -EFAULT) {
- pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
- pfn, tk->tsk->comm, tk->tsk->pid);
+ pr_err_ratelimited(
+ "Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
tk->tsk, PIDTYPE_PID);
}
@@ -408,8 +409,9 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* process anyways.
*/
else if (kill_proc(tk, pfn, flags) < 0)
- pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
- pfn, tk->tsk->comm, tk->tsk->pid);
+ pr_err_ratelimited(
+ "Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
+ pfn, tk->tsk->comm, tk->tsk->pid);
}
put_task_struct(tk->tsk);
kfree(tk);
@@ -919,8 +921,8 @@ static void action_result(unsigned long pfn, enum mf_action_page_type type,
{
trace_memory_failure_event(pfn, type, result);
- pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
- pfn, action_page_types[type], action_name[result]);
+ pr_err_ratelimited("Memory failure: %#lx: recovery action for %s: %s\n",
+ pfn, action_page_types[type], action_name[result]);
}
static int page_action(struct page_state *ps, struct page *p,
@@ -1375,8 +1377,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
unlock:
dax_unlock_page(page, cookie);
out:
- /* drop pgmap ref acquired in caller */
- put_dev_pagemap(pgmap);
action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
return rc;
}
@@ -1415,9 +1415,12 @@ int memory_failure(unsigned long pfn, int flags)
if (!p) {
if (pfn_valid(pfn)) {
pgmap = get_dev_pagemap(pfn, NULL);
- if (pgmap)
- return memory_failure_dev_pagemap(pfn, flags,
- pgmap);
+ if (pgmap) {
+ res = memory_failure_dev_pagemap(pfn, flags,
+ pgmap);
+ put_dev_pagemap(pgmap);
+ return res;
+ }
}
pr_err("Memory failure: %#lx: memory outside kernel control\n",
pfn);
Powered by blists - more mailing lists