[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <60125c55-9ab7-7531-e124-cb676943fea6@redhat.com>
Date: Wed, 18 Jan 2023 15:10:53 +0100
From: David Hildenbrand <david@...hat.com>
To: yang.yang29@....com.cn, akpm@...ux-foundation.org
Cc: imbrenda@...ux.ibm.com, jiang.xuexin@....com.cn,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
ran.xiaokai@....com.cn, xu.xin.sc@...il.com, xu.xin16@....com.cn
Subject: Re: [PATCH v5 2/6] ksm: support unsharing zero pages placed by KSM
On 30.12.22 02:13, yang.yang29@....com.cn wrote:
> From: xu xin <xu.xin16@....com.cn>
>
> use_zero_pages may be very useful, not just because of cache colouring
> as described in doc, but also because use_zero_pages can accelerate
> merging empty pages when there are plenty of empty pages (full of zeros)
> as the time of page-by-page comparisons (unstable_tree_search_insert) is
> saved.
>
> But when enabling use_zero_pages, madvise(addr, len, MADV_UNMERGEABLE) and
> other ways (like write 2 to /sys/kernel/mm/ksm/run) to trigger unsharing
> will *not* actually unshare the shared zeropage as placed by KSM (which is
> against the MADV_UNMERGEABLE documentation). As these KSM-placed zero pages
> are out of the control of KSM, the related counts of ksm pages don't expose
> how many zero pages are placed by KSM (these special zero pages are different
> from those initially mapped zero pages, because the zero pages mapped to
> MADV_UNMERGEABLE areas are expected to be a complete and unshared page)
>
> To not blindly unshare all shared zero_pages in applicable VMAs, the patch
> introduces a dedicated flag ZERO_PAGE_FLAG to mark the rmap_items of those
> shared zero_pages. and guarantee that these rmap_items will be not freed
> during the time of zero_pages not being writing, so we can only unshare
> the *KSM-placed* zero_pages.
>
> The patch will not degrade the performance of use_zero_pages as it doesn't
> change the way of merging empty pages in use_zero_pages's feature.
>
> Signed-off-by: xu xin <xu.xin16@....com.cn>
> Reported-by: David Hildenbrand <david@...hat.com>
> Cc: Claudio Imbrenda <imbrenda@...ux.ibm.com>
> Cc: Xuexin Jiang <jiang.xuexin@....com.cn>
> Reviewed-by: Xiaokai Ran <ran.xiaokai@....com.cn>
> Reviewed-by: Yang Yang <yang.yang29@....com.cn>
[...]
>
> /* The stable and unstable tree heads */
> static struct rb_root one_stable_tree[1] = { RB_ROOT };
> @@ -420,6 +421,11 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
> return atomic_read(&mm->mm_users) == 0;
> }
>
> +enum break_ksm_pmd_entry_return_flag {
> + HAVE_KSM_PAGE = 1,
> + HAVE_ZERO_PAGE
> +};
Why use flags if they both conditions are mutually exclusive?
> +
> static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next,
> struct mm_walk *walk)
> {
> @@ -427,6 +433,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
> spinlock_t *ptl;
> pte_t *pte;
> int ret;
> + bool is_zero_page = false;
>
> if (pmd_leaf(*pmd) || !pmd_present(*pmd))
> return 0;
> @@ -434,6 +441,8 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
> pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
> if (pte_present(*pte)) {
> page = vm_normal_page(walk->vma, addr, *pte);
> + if (!page)
> + is_zero_page = is_zero_pfn(pte_pfn(*pte));
> } else if (!pte_none(*pte)) {
> swp_entry_t entry = pte_to_swp_entry(*pte);
>
> @@ -444,7 +453,14 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
> if (is_migration_entry(entry))
> page = pfn_swap_entry_to_page(entry);
> }
> - ret = page && PageKsm(page);
> +
> + if (page && PageKsm(page))
> + ret = HAVE_KSM_PAGE;
> + else if (is_zero_page)
> + ret = HAVE_ZERO_PAGE;
> + else
> + ret = 0;
> +
> pte_unmap_unlock(pte, ptl);
> return ret;
> }
> @@ -466,19 +482,22 @@ static const struct mm_walk_ops break_ksm_ops = {
> * of the process that owns 'vma'. We also do not want to enforce
> * protection keys here anyway.
> */
> -static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
> +static int break_ksm(struct vm_area_struct *vma, unsigned long addr,
> + bool unshare_zero_page)
> {
> vm_fault_t ret = 0;
>
> do {
> - int ksm_page;
> + int walk_result;
>
> cond_resched();
> - ksm_page = walk_page_range_vma(vma, addr, addr + 1,
> + walk_result = walk_page_range_vma(vma, addr, addr + 1,
> &break_ksm_ops, NULL);
> - if (WARN_ON_ONCE(ksm_page < 0))
> - return ksm_page;
> - if (!ksm_page)
> + if (WARN_ON_ONCE(walk_result < 0))
> + return walk_result;
> + if (!walk_result)
> + return 0;
> + if (walk_result == HAVE_ZERO_PAGE && !unshare_zero_page)
> return 0;
> ret = handle_mm_fault(vma, addr,
> FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
> @@ -539,7 +558,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
> mmap_read_lock(mm);
> vma = find_mergeable_vma(mm, addr);
> if (vma)
> - break_ksm(vma, addr);
> + break_ksm(vma, addr, false);
> mmap_read_unlock(mm);
> }
>
> @@ -764,6 +783,30 @@ static struct page *get_ksm_page(struct ksm_stable_node *stable_node,
> return NULL;
> }
>
> +/*
> + * Cleaning the rmap_item's ZERO_PAGE_FLAG
> + * This function will be called when unshare or writing on zero pages.
> + */
> +static inline void clean_rmap_item_zero_flag(struct ksm_rmap_item *rmap_item)
> +{
> + if (rmap_item->address & ZERO_PAGE_FLAG)
> + rmap_item->address &= PAGE_MASK;
> +}
> +
> +/* Only called when rmap_item is going to be freed */
> +static inline void unshare_zero_pages(struct ksm_rmap_item *rmap_item)
> +{
> + struct vm_area_struct *vma;
> +
> + if (rmap_item->address & ZERO_PAGE_FLAG) {
> + vma = vma_lookup(rmap_item->mm, rmap_item->address);
> + if (vma && !ksm_test_exit(rmap_item->mm))
> + break_ksm(vma, rmap_item->address, true);
> + }
> + /* Put at last. */
> + clean_rmap_item_zero_flag(rmap_item);
> +}
> +
> /*
> * Removing rmap_item from stable or unstable tree.
> * This function will clean the information from the stable/unstable tree.
> @@ -824,6 +867,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
> struct ksm_rmap_item *rmap_item = *rmap_list;
> *rmap_list = rmap_item->rmap_list;
> remove_rmap_item_from_tree(rmap_item);
> + unshare_zero_pages(rmap_item);
> free_rmap_item(rmap_item);
> }
> }
> @@ -853,7 +897,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
> if (signal_pending(current))
> err = -ERESTARTSYS;
> else
> - err = break_ksm(vma, addr);
> + err = break_ksm(vma, addr, false);
> }
MADV_UNMERGEABLE -> unmerge_ksm_pages() will never unshare the shared
zeropage? I thought the patch description mentions that that is one of
the goals?
--
Thanks,
David / dhildenb
Powered by blists - more mailing lists