[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8825848a-b4d2-4b0e-beee-1d889b1ee284@huawei.com>
Date: Tue, 14 Jan 2025 11:37:07 +0800
From: mawupeng <mawupeng1@...wei.com>
To: <david@...hat.com>, <akpm@...ux-foundation.org>, <osalvador@...e.de>,
<nao.horiguchi@...il.com>, <linmiaohe@...wei.com>, <mhocko@...e.com>
CC: <mawupeng1@...wei.com>, <linux-mm@...ck.org>,
<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 1/2] mm: memory_hotplug: add TTU_HWPOISON for poisoned
folio during migrate
On 2025/1/13 21:02, David Hildenbrand wrote:
> On 13.01.25 09:27, Wupeng Ma wrote:
>> From: Ma Wupeng <mawupeng1@...wei.com>
>>
>> Commit 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to
>> TTU_HWPOISON") introduce TTU_HWPOISON to replace TTU_IGNORE_HWPOISON
>> in order to stop send SIGBUS signal when accessing an error page after
>> a memory error on a clean folio. However during page migration, task
>> should be killed during migrate in order to make this operation succeed.
>>
>> Waring will be produced during unamp poison folio with the following log:
>>
>> ------------[ cut here ]------------
>> WARNING: CPU: 1 PID: 365 at mm/rmap.c:1847 try_to_unmap_one+0x8fc/0xd3c
>
> Is that the
>
> if (unlikely(folio_test_swapbacked(folio) !=
> folio_test_swapcache(folio))) {
> WARN_ON_ONCE(1);
> goto walk_abort;
> }
>
> ?
Yes.
>
>> Modules linked in:
>> CPU: 1 UID: 0 PID: 365 Comm: bash Tainted: G W 6.13.0-rc1-00018-gacdb4bbda7ab #42
>> Tainted: [W]=WARN
>> Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
>> pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>> pc : try_to_unmap_one+0x8fc/0xd3c
>> lr : try_to_unmap_one+0x3dc/0xd3c
>> Call trace:
>> try_to_unmap_one+0x8fc/0xd3c (P)
>> try_to_unmap_one+0x3dc/0xd3c (L)
>> rmap_walk_anon+0xdc/0x1f8
>> rmap_walk+0x3c/0x58
>> try_to_unmap+0x88/0x90
>> unmap_poisoned_folio+0x30/0xa8
>> do_migrate_range+0x4a0/0x568
>> offline_pages+0x5a4/0x670
>> memory_block_action+0x17c/0x374
>> memory_subsys_offline+0x3c/0x78
>> device_offline+0xa4/0xd0
>> state_store+0x8c/0xf0
>> dev_attr_store+0x18/0x2c
>> sysfs_kf_write+0x44/0x54
>> kernfs_fop_write_iter+0x118/0x1a8
>> vfs_write+0x3a8/0x4bc
>> ksys_write+0x6c/0xf8
>> __arm64_sys_write+0x1c/0x28
>> invoke_syscall+0x44/0x100
>> el0_svc_common.constprop.0+0x40/0xe0
>> do_el0_svc+0x1c/0x28
>> el0_svc+0x30/0xd0
>> el0t_64_sync_handler+0xc8/0xcc
>> el0t_64_sync+0x198/0x19c
>> ---[ end trace 0000000000000000 ]---
>>
>> Add TTU_HWPOISON during unmap_poisoned_folio to fix this problem.
>>
>> Fixes: 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON")
>> Signed-off-by: Ma Wupeng <mawupeng1@...wei.com>
>> ---
>> mm/memory_hotplug.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index c43b4e7fb298..330668d37e44 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1806,7 +1806,7 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
>> if (WARN_ON(folio_test_lru(folio)))
>> folio_isolate_lru(folio);
>> if (folio_mapped(folio))
>> - unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK);
>> + unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK | TTU_HWPOISON);
>
>
> Correct me if I am wrong: if we reach that point, we already failed the
> unmap_poisoned_folio() in hwpoison_user_mappings(), and did a
>
> pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n", ...
>
> there.
We have faced a different race here as follow:
CPU#0 cpu #1
offline_pages
do_migrate_range
memory_failure
TestSetPageHWPoison
// folio is mark poison here
unmap_poisoned_folio // should kill task here
...
hwpoison_user_mappings
>
>
> This all looks odd. We must not call unmap_*() on an anon folio without
> setting TTU_HWPOISON ever if they are poisoned. But for the pagecache we
> just might want to?
>
>
> What about detecting the flags internally and just moving the detection logic into
> unmap_poisoned_folio()?
Your solution do seems better and solved my problem.I'll send another patch based on
your idea.
>
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 109ef30fee11f..eb8266d754b71 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1115,7 +1115,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
> * mm/memory-failure.c
> */
> #ifdef CONFIG_MEMORY_FAILURE
> -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu);
> +int unmap_poisoned_folio(struct folio *folio, bool must_kill);
> void shake_folio(struct folio *folio);
> extern int hwpoison_filter(struct page *p);
>
> @@ -1138,7 +1138,7 @@ unsigned long page_mapped_in_vma(const struct page *page,
> struct vm_area_struct *vma);
>
> #else
> -static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
> +static inline int unmap_poisoned_folio(struct folio *folio, bool must_kill)
> {
> }
> #endif
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index a7b8ccd29b6f5..2054b63e9b79c 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1556,8 +1556,29 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
> return ret;
> }
>
> -void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
> +int unmap_poisoned_folio(struct folio *folio, bool must_kill)
> {
> + enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
> + struct address_space *mapping;
> +
> + /*
> + * Propagate the dirty bit from PTEs to struct page first, because we
> + * need this to decide if we should kill or just drop the page.
> + * XXX: the dirty test could be racy: set_page_dirty() may not always
> + * be called inside page lock (it's recommended but not enforced).
> + */
> + mapping = folio_mapping(folio);
> + if (!must_kill && !folio_test_dirty(folio) && mapping &&
> + mapping_can_writeback(mapping)) {
> + if (folio_mkclean(folio)) {
> + folio_set_dirty(folio);
> + } else {
> + ttu &= ~TTU_HWPOISON;
> + pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
> + pfn);
> + }
> + }
> +
> if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
> struct address_space *mapping;
>
> @@ -1580,6 +1601,8 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
> } else {
> try_to_unmap(folio, ttu);
> }
> +
> + return folio_mapped(folio) ? -EBUSY : 0;
> }
>
> /*
> @@ -1589,8 +1612,6 @@ void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
> static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
> unsigned long pfn, int flags)
> {
> - enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
> - struct address_space *mapping;
> LIST_HEAD(tokill);
> bool unmap_success;
> int forcekill;
> @@ -1618,24 +1639,6 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
> ttu &= ~TTU_HWPOISON;
> }
>
> - /*
> - * Propagate the dirty bit from PTEs to struct page first, because we
> - * need this to decide if we should kill or just drop the page.
> - * XXX: the dirty test could be racy: set_page_dirty() may not always
> - * be called inside page lock (it's recommended but not enforced).
> - */
> - mapping = folio_mapping(folio);
> - if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping &&
> - mapping_can_writeback(mapping)) {
> - if (folio_mkclean(folio)) {
> - folio_set_dirty(folio);
> - } else {
> - ttu &= ~TTU_HWPOISON;
> - pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
> - pfn);
> - }
> - }
> -
> /*
> * First collect all the processes that have the page
> * mapped in dirty form. This has to be done before try_to_unmap,
> @@ -1643,9 +1646,7 @@ static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
> */
> collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
>
> - unmap_poisoned_folio(folio, ttu);
> -
> - unmap_success = !folio_mapped(folio);
> + unmap_success = !unmap_poisoned_folio(folio, flags & MF_MUST_KILL);
> if (!unmap_success)
> pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n",
> pfn, folio_mapcount(folio));
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index e3655f07dd6e3..c549e68102251 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1833,7 +1833,7 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
> if (WARN_ON(folio_test_lru(folio)))
> folio_isolate_lru(folio);
> if (folio_mapped(folio))
> - unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK);
> + unmap_poisoned_folio(folio, false);
> continue;
> }
>
Powered by blists - more mailing lists