lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Mon, 28 Feb 2022 17:41:32 -0500 From: Felix Kuehling <felix.kuehling@....com> To: Alex Sierra <alex.sierra@....com>, jgg@...dia.com Cc: david@...hat.com, linux-mm@...ck.org, rcampbell@...dia.com, linux-ext4@...r.kernel.org, linux-xfs@...r.kernel.org, amd-gfx@...ts.freedesktop.org, dri-devel@...ts.freedesktop.org, hch@....de, jglisse@...hat.com, apopple@...dia.com, willy@...radead.org, akpm@...ux-foundation.org Subject: Re: [PATCH] mm: split vm_normal_pages for LRU and non-LRU handling On 2022-02-28 15:34, Alex Sierra wrote: > DEVICE_COHERENT pages introduce a subtle distinction in the way > "normal" pages can be used by various callers throughout the kernel. > They behave like normal pages for purposes of mapping in CPU page > tables, and for COW. But they do not support LRU lists, NUMA > migration or THP. Should have mentioned KSM here as well for completeness. > Therefore we split vm_normal_page into two > functions vm_normal_any_page and vm_normal_lru_page. The latter will > only return pages that can be put on an LRU list and that support > NUMA migration and THP. > > We also introduced a FOLL_LRU flag that adds the same behaviour to > follow_page and related APIs, to allow callers to specify that they > expect to put pages on an LRU list. > > Signed-off-by: Alex Sierra <alex.sierra@....com> Acked-by: Felix Kuehling <Felix.Kuehling@....com> FWIW. Full disclosure, Alex and I worked on this together, but it's a bit like the blind leading the blind. ;) It's mostly untested at this point. Alex is working on adding tests for get_user_pages of DEVICE_COHERENT pages without FOLL_LONGTERM to test_hmm and also a test for COW of DEVICE_COHERENT pages. A few more nit-picks inline. > --- > fs/proc/task_mmu.c | 12 +++++----- > include/linux/mm.h | 53 ++++++++++++++++++++++++--------------------- > mm/gup.c | 10 +++++---- > mm/hmm.c | 2 +- > mm/huge_memory.c | 2 +- > mm/khugepaged.c | 8 +++---- > mm/ksm.c | 4 ++-- > mm/madvise.c | 4 ++-- > mm/memcontrol.c | 2 +- > mm/memory.c | 38 ++++++++++++++++++++++---------- > mm/mempolicy.c | 4 ++-- > mm/migrate.c | 2 +- > mm/migrate_device.c | 2 +- > mm/mlock.c | 6 ++--- > mm/mprotect.c | 2 +- > 15 files changed, 85 insertions(+), 66 deletions(-) > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 18f8c3acbb85..4274128fbb4c 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -519,7 +519,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, > struct page *page = NULL; > > if (pte_present(*pte)) { > - page = vm_normal_page(vma, addr, *pte); > + page = vm_normal_any_page(vma, addr, *pte); > } else if (is_swap_pte(*pte)) { > swp_entry_t swpent = pte_to_swp_entry(*pte); > > @@ -705,7 +705,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, > struct page *page = NULL; > > if (pte_present(*pte)) { > - page = vm_normal_page(vma, addr, *pte); > + page = vm_normal_any_page(vma, addr, *pte); > } else if (is_swap_pte(*pte)) { > swp_entry_t swpent = pte_to_swp_entry(*pte); > > @@ -1059,7 +1059,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, > return false; > if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))) > return false; > - page = vm_normal_page(vma, addr, pte); > + page = vm_normal_any_page(vma, addr, pte); > if (!page) > return false; > return page_maybe_dma_pinned(page); > @@ -1172,7 +1172,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, > if (!pte_present(ptent)) > continue; > > - page = vm_normal_page(vma, addr, ptent); > + page = vm_normal_any_page(vma, addr, ptent); > if (!page) > continue; > > @@ -1383,7 +1383,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, > if (pm->show_pfn) > frame = pte_pfn(pte); > flags |= PM_PRESENT; > - page = vm_normal_page(vma, addr, pte); > + page = vm_normal_any_page(vma, addr, pte); > if (pte_soft_dirty(pte)) > flags |= PM_SOFT_DIRTY; > if (pte_uffd_wp(pte)) > @@ -1761,7 +1761,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, > if (!pte_present(pte)) > return NULL; > > - page = vm_normal_page(vma, addr, pte); > + page = vm_normal_lru_page(vma, addr, pte); > if (!page) > return NULL; > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index ff9f149ca201..8c9f87151d93 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -593,8 +593,8 @@ struct vm_operations_struct { > unsigned long addr); > #endif > /* > - * Called by vm_normal_page() for special PTEs to find the > - * page for @addr. This is useful if the default behavior > + * Called by vm_normal_x_page() for special PTEs to find the I'd use vm_normal_*_page in these comments to avoid confusion, because vm_normal_x_page is actually a valid symbol name, which doesn't exist. > + * page for @addr. This is useful if the default behavior > * (using pte_page()) would not find the correct page. > */ > struct page *(*find_special_page)(struct vm_area_struct *vma, > @@ -1781,7 +1781,9 @@ static inline bool can_do_mlock(void) { return false; } > extern int user_shm_lock(size_t, struct ucounts *); > extern void user_shm_unlock(size_t, struct ucounts *); > > -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, > +struct page *vm_normal_any_page(struct vm_area_struct *vma, unsigned long addr, > + pte_t pte); > +struct page *vm_normal_lru_page(struct vm_area_struct *vma, unsigned long addr, > pte_t pte); > struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, > pmd_t pmd); > @@ -2880,27 +2882,28 @@ static inline vm_fault_t vmf_error(int err) > struct page *follow_page(struct vm_area_struct *vma, unsigned long address, > unsigned int foll_flags); > > -#define FOLL_WRITE 0x01 /* check pte is writable */ > -#define FOLL_TOUCH 0x02 /* mark page accessed */ > -#define FOLL_GET 0x04 /* do get_page on page */ > -#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ > -#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ > -#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO > - * and return without waiting upon it */ > -#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ > -#define FOLL_NOFAULT 0x80 /* do not fault in pages */ > -#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ > -#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ > -#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ > -#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ > -#define FOLL_MLOCK 0x1000 /* lock present pages */ > -#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ > -#define FOLL_COW 0x4000 /* internal GUP flag */ > -#define FOLL_ANON 0x8000 /* don't do file mappings */ > -#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ > -#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ > -#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ > -#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ > +#define FOLL_WRITE 0x01 /* check pte is writable */ > +#define FOLL_TOUCH 0x02 /* mark page accessed */ > +#define FOLL_GET 0x04 /* do get_page on page */ > +#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ > +#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ > +#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO > + * and return without waiting upon it */ > +#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ > +#define FOLL_NOFAULT 0x80 /* do not fault in pages */ > +#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ > +#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ > +#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ > +#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ > +#define FOLL_MLOCK 0x1000 /* lock present pages */ > +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ > +#define FOLL_COW 0x4000 /* internal GUP flag */ > +#define FOLL_ANON 0x8000 /* don't do file mappings */ > +#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ > +#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ > +#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ > +#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ > +#define FOLL_LRU 0x100000 /* return only LRU (anon or page cache) */ > > /* > * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each > @@ -3227,7 +3230,7 @@ extern long copy_huge_page_from_user(struct page *dst_page, > * @vma: Pointer to the struct vm_area_struct to consider > * > * Whether transhuge page-table entries are considered "special" following > - * the definition in vm_normal_page(). > + * the definition in vm_normal_x_page(). vm_normal_*_page > * > * Return: true if transhuge page-table entries should be considered special, > * false otherwise. > diff --git a/mm/gup.c b/mm/gup.c > index 41349b685eaf..9e172c906ded 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -539,8 +539,10 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, > pte_unmap_unlock(ptep, ptl); > return NULL; > } > - > - page = vm_normal_page(vma, address, pte); > + if (flags & (FOLL_MLOCK | FOLL_LRU)) > + page = vm_normal_lru_page(vma, address, pte); > + else > + page = vm_normal_any_page(vma, address, pte); > if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { > /* > * Only return device mapping pages in the FOLL_GET or FOLL_PIN > @@ -824,7 +826,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, > * > * Return: the mapped (struct page *), %NULL if no mapping exists, or > * an error pointer if there is a mapping to something not represented > - * by a page descriptor (see also vm_normal_page()). > + * by a page descriptor (see also vm_normal_x_page()). vm_normal_*_page > */ > static struct page *follow_page_mask(struct vm_area_struct *vma, > unsigned long address, unsigned int flags, > @@ -917,7 +919,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, > *vma = get_gate_vma(mm); > if (!page) > goto out; > - *page = vm_normal_page(*vma, address, *pte); > + *page = vm_normal_any_page(*vma, address, *pte); > if (!*page) { > if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte))) > goto unmap; > diff --git a/mm/hmm.c b/mm/hmm.c > index bd56641c79d4..90c949d66712 100644 > --- a/mm/hmm.c > +++ b/mm/hmm.c > @@ -300,7 +300,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, > * Since each architecture defines a struct page for the zero page, just > * fall through and treat it like a normal page. > */ > - if (!vm_normal_page(walk->vma, addr, pte) && > + if (!vm_normal_any_page(walk->vma, addr, pte) && > !pte_devmap(pte) && > !is_zero_pfn(pte_pfn(pte))) { > if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) { > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 406a3c28c026..ea1efc825774 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2966,7 +2966,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, > } > > /* FOLL_DUMP to ignore special (like zero) pages */ > - follflags = FOLL_GET | FOLL_DUMP; > + follflags = FOLL_GET | FOLL_DUMP | FOLL_LRU; > page = follow_page(vma, addr, follflags); > > if (IS_ERR(page)) > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 131492fd1148..a7153db09afa 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -627,7 +627,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > result = SCAN_PTE_NON_PRESENT; > goto out; > } > - page = vm_normal_page(vma, address, pteval); > + page = vm_normal_lru_page(vma, address, pteval); > if (unlikely(!page)) { > result = SCAN_PAGE_NULL; > goto out; > @@ -1286,7 +1286,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, > if (pte_write(pteval)) > writable = true; > > - page = vm_normal_page(vma, _address, pteval); > + page = vm_normal_lru_page(vma, _address, pteval); > if (unlikely(!page)) { > result = SCAN_PAGE_NULL; > goto out_unmap; > @@ -1494,7 +1494,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) > if (!pte_present(*pte)) > goto abort; > > - page = vm_normal_page(vma, addr, *pte); > + page = vm_normal_lru_page(vma, addr, *pte); > > /* > * Note that uprobe, debugger, or MAP_PRIVATE may change the > @@ -1512,7 +1512,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) > > if (pte_none(*pte)) > continue; > - page = vm_normal_page(vma, addr, *pte); > + page = vm_normal_lru_page(vma, addr, *pte); > page_remove_rmap(page, false); > } > > diff --git a/mm/ksm.c b/mm/ksm.c > index c20bd4d9a0d9..352d37e44694 100644 > --- a/mm/ksm.c > +++ b/mm/ksm.c > @@ -474,7 +474,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) > do { > cond_resched(); > page = follow_page(vma, addr, > - FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); > + FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE | FOLL_LRU); > if (IS_ERR_OR_NULL(page)) > break; > if (PageKsm(page)) > @@ -559,7 +559,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) > if (!vma) > goto out; > > - page = follow_page(vma, addr, FOLL_GET); > + page = follow_page(vma, addr, FOLL_GET | FOLL_LRU); > if (IS_ERR_OR_NULL(page)) > goto out; > if (PageAnon(page)) { > diff --git a/mm/madvise.c b/mm/madvise.c > index 5604064df464..1a553aad9aa3 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -439,7 +439,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, > if (!pte_present(ptent)) > continue; > > - page = vm_normal_page(vma, addr, ptent); > + page = vm_normal_lru_page(vma, addr, ptent); > if (!page) > continue; > > @@ -649,7 +649,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, > continue; > } > > - page = vm_normal_page(vma, addr, ptent); > + page = vm_normal_lru_page(vma, addr, ptent); > if (!page) > continue; > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 10259c35fde2..9677eb27dea8 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -5476,7 +5476,7 @@ enum mc_target_type { > static struct page *mc_handle_present_pte(struct vm_area_struct *vma, > unsigned long addr, pte_t ptent) > { > - struct page *page = vm_normal_page(vma, addr, ptent); > + struct page *page = vm_normal_any_page(vma, addr, ptent); > > if (!page || !page_mapped(page)) > return NULL; > diff --git a/mm/memory.c b/mm/memory.c > index c125c4969913..cff84e6a6c4b 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -565,7 +565,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, > } > > /* > - * vm_normal_page -- This function gets the "struct page" associated with a pte. > + * vm_normal_any_page -- This function gets the "struct page" associated with a pte. > * > * "Special" mappings do not wish to be associated with a "struct page" (either > * it doesn't exist, or it exists but they don't want to touch it). In this > @@ -606,7 +606,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, > * PFNMAP mappings in order to support COWable mappings. > * > */ > -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, > +struct page *vm_normal_any_page(struct vm_area_struct *vma, unsigned long addr, > pte_t pte) > { > unsigned long pfn = pte_pfn(pte); > @@ -620,8 +620,6 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, > return NULL; > if (is_zero_pfn(pfn)) > return NULL; > - if (pte_devmap(pte)) > - return NULL; > > print_bad_pte(vma, addr, pte, NULL); > return NULL; > @@ -661,6 +659,22 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, > return pfn_to_page(pfn); > } > > +/* > + * vm_normal_lru_page -- This function gets the "struct page" associated > + * with a pte only for page cache and anon page. These pages are LRU handled. > + */ > +struct page *vm_normal_lru_page(struct vm_area_struct *vma, unsigned long addr, > + pte_t pte) > +{ > + struct page *page; > + > + page = vm_normal_any_page(vma, addr, pte); > + if (is_zone_device_page(page)) > + return NULL; > + > + return page; > +} > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, > pmd_t pmd) > @@ -670,7 +684,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, > /* > * There is no pmd_special() but there may be special pmds, e.g. > * in a direct-access (dax) mapping, so let's just replicate the > - * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_page() here. > + * !CONFIG_ARCH_HAS_PTE_SPECIAL case from vm_normal_any_page() here. > */ > if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { > if (vma->vm_flags & VM_MIXEDMAP) { > @@ -946,7 +960,7 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, > pte_t pte = *src_pte; > struct page *page; > > - page = vm_normal_page(src_vma, addr, pte); > + page = vm_normal_any_page(src_vma, addr, pte); > if (page) { > int retval; > > @@ -1358,7 +1372,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > if (pte_present(ptent)) { > struct page *page; > > - page = vm_normal_page(vma, addr, ptent); > + page = vm_normal_any_page(vma, addr, ptent); > if (unlikely(zap_skip_check_mapping(details, page))) > continue; > ptent = ptep_get_and_clear_full(mm, addr, pte, > @@ -2168,7 +2182,7 @@ EXPORT_SYMBOL(vmf_insert_pfn); > > static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) > { > - /* these checks mirror the abort conditions in vm_normal_page */ > + /* these checks mirror the abort conditions in vm_normal_lru_page */ > if (vma->vm_flags & VM_MIXEDMAP) > return true; > if (pfn_t_devmap(pfn)) If this is to match the new vm_normal_lru_page, it should replace "if (pfn_t_devmap(pfn))" with a check that the page is not a device page. But for that it would have to actually look up the struct page. I'm not sure what to do about this. __vm_insert_mixed still does something special with devmap pages, which no longer matches vm_normal_*_page. > @@ -2198,7 +2212,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, > > /* > * If we don't have pte special, then we have to use the pfn_valid() > - * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must* > + * based VM_MIXEDMAP scheme (see vm_normal_any_page), and thus we *must* > * refcount the page if pfn_valid is true (hence insert_page rather > * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP > * without pte special, it would there be refcounted as a normal page. > @@ -2408,7 +2422,7 @@ int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, > * There's a horrible special case to handle copy-on-write > * behaviour that some programs depend on. We mark the "original" > * un-COW'ed pages by matching them up with "vma->vm_pgoff". > - * See vm_normal_page() for details. > + * See vm_normal_any_page() for details. > */ > if (is_cow_mapping(vma->vm_flags)) { > if (addr != vma->vm_start || end != vma->vm_end) > @@ -3267,7 +3281,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > mm_tlb_flush_pending(vmf->vma->vm_mm))) > flush_tlb_page(vmf->vma, vmf->address); > > - vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); > + vmf->page = vm_normal_any_page(vma, vmf->address, vmf->orig_pte); > if (!vmf->page) { > /* > * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a > @@ -4364,7 +4378,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) > old_pte = ptep_get(vmf->pte); > pte = pte_modify(old_pte, vma->vm_page_prot); > > - page = vm_normal_page(vma, vmf->address, pte); > + page = vm_normal_lru_page(vma, vmf->address, pte); > if (!page) > goto out_map; > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 028e8dd82b44..9962de4981d6 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -527,11 +527,11 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, > for (; addr != end; pte++, addr += PAGE_SIZE) { > if (!pte_present(*pte)) > continue; > - page = vm_normal_page(vma, addr, *pte); > + page = vm_normal_lru_page(vma, addr, *pte); > if (!page) > continue; > /* > - * vm_normal_page() filters out zero pages, but there might > + * vm_normal_lru_page() filters out zero pages, but there might > * still be PageReserved pages to skip, perhaps in a VDSO. > */ > if (PageReserved(page)) > diff --git a/mm/migrate.c b/mm/migrate.c > index c31d04b46a5e..17d049311b78 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -1614,7 +1614,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, > goto out; > > /* FOLL_DUMP to ignore special (like zero) pages */ > - follflags = FOLL_GET | FOLL_DUMP; > + follflags = FOLL_GET | FOLL_DUMP | FOLL_LRU; > page = follow_page(vma, addr, follflags); > > err = PTR_ERR(page); > diff --git a/mm/migrate_device.c b/mm/migrate_device.c > index 3373b535d5c9..fac1b6978361 100644 > --- a/mm/migrate_device.c > +++ b/mm/migrate_device.c > @@ -154,7 +154,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, > migrate->cpages++; > goto next; > } > - page = vm_normal_page(migrate->vma, addr, pte); > + page = vm_normal_any_page(migrate->vma, addr, pte); > if (page && !is_zone_device_page(page) && > !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) > goto next; > diff --git a/mm/mlock.c b/mm/mlock.c > index 8f584eddd305..52613e2f2a70 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -342,7 +342,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) > * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone. > * > * The rest of @pvec is filled by subsequent pages within the same pmd and same > - * zone, as long as the pte's are present and vm_normal_page() succeeds. These > + * zone, as long as the pte's are present and vm_normal_any_page() succeeds. These The comment says vm_normal_any_page. But the function uses vm_normal_lru_page. Regards, Felix > * pages also get pinned. > * > * Returns the address of the next page that should be scanned. This equals > @@ -373,7 +373,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, > struct page *page = NULL; > pte++; > if (pte_present(*pte)) > - page = vm_normal_page(vma, start, *pte); > + page = vm_normal_lru_page(vma, start, *pte); > /* > * Break if page could not be obtained or the page's node+zone does not > * match > @@ -439,7 +439,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, > * suits munlock very well (and if somehow an abnormal page > * has sneaked into the range, we won't oops here: great). > */ > - page = follow_page(vma, start, FOLL_GET | FOLL_DUMP); > + page = follow_page(vma, start, FOLL_GET | FOLL_DUMP | FOLL_LRU); > > if (page && !IS_ERR(page)) { > if (PageTransTail(page)) { > diff --git a/mm/mprotect.c b/mm/mprotect.c > index 0138dfcdb1d8..d236394d41d5 100644 > --- a/mm/mprotect.c > +++ b/mm/mprotect.c > @@ -88,7 +88,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, > if (pte_protnone(oldpte)) > continue; > > - page = vm_normal_page(vma, addr, oldpte); > + page = vm_normal_lru_page(vma, addr, oldpte); > if (!page || PageKsm(page)) > continue; >
Powered by blists - more mailing lists