lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0bd084b6-27be-4479-c22d-ba72923f1b78@os.amperecomputing.com>
Date:   Wed, 20 Sep 2023 16:36:45 -0700
From:   Yang Shi <yang@...amperecomputing.com>
To:     hughd@...gle.com, surenb@...gle.com, willy@...radead.org,
        mhocko@...e.com, vbabka@...e.cz, osalvador@...e.de,
        aquini@...hat.com, kirill@...temov.name, rientjes@...gle.com
Cc:     linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        stable@...r.kernel.org
Subject: Re: [PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and
 MPOL_MF_MOVE are specified



On 9/20/23 3:32 PM, Yang Shi wrote:
> When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT,
> kernel should attempt to migrate all existing pages, and return -EIO if
> there is misplaced or unmovable page.  Then commit 6f4576e3687b
> ("mempolicy: apply page table walker on queue_pages_range()") messed up
> the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT
> alone.  The return value problem was fixed by commit a7f40cfe3b7a
> ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"),
> but it broke the VMA walk early if unmovable page is met, it may cause some
> pages are not migrated as expected.
>
> The code should conceptually do:
>
>   if (MPOL_MF_MOVE|MOVEALL)
>       scan all vmas
>       try to migrate the existing pages
>       return success
>   else if (MPOL_MF_MOVE* | MPOL_MF_STRICT)
>       scan all vmas
>       try to migrate the existing pages
>       return -EIO if unmovable or migration failed
>   else /* MPOL_MF_STRICT alone */
>       break early if meets unmovable and don't call mbind_range() at all
>   else /* none of those flags */
>       check the ranges in test_walk, EFAULT without mbind_range() if discontig.
>
> Fixed the behavior.

Forgot the fixes.

Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when 
MPOL_MF_STRICT is specified")

>
> Cc: Hugh Dickins <hughd@...gle.com>
> Cc: Suren Baghdasaryan <surenb@...gle.com>
> Cc: Matthew Wilcox <willy@...radead.org>
> Cc: Michal Hocko <mhocko@...e.com>
> Cc: Vlastimil Babka <vbabka@...e.cz>
> Cc: Oscar Salvador <osalvador@...e.de>
> Cc: Rafael Aquini <aquini@...hat.com>
> Cc: Kirill A. Shutemov <kirill@...temov.name>
> Cc: David Rientjes <rientjes@...gle.com>
> Cc: <stable@...r.kernel.org> v4.9+
> Signed-off-by: Yang Shi <yang@...amperecomputing.com>
> ---
>   mm/mempolicy.c | 39 +++++++++++++++++++--------------------
>   1 file changed, 19 insertions(+), 20 deletions(-)
>
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 42b5567e3773..f1b00d6ac7ee 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -426,6 +426,7 @@ struct queue_pages {
>   	unsigned long start;
>   	unsigned long end;
>   	struct vm_area_struct *first;
> +	bool has_unmovable;
>   };
>   
>   /*
> @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio,
>   /*
>    * queue_folios_pmd() has three possible return values:
>    * 0 - folios are placed on the right node or queued successfully, or
> - *     special page is met, i.e. huge zero page.
> - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
> - *     specified.
> + *     special page is met, i.e. zero page, or unmovable page is found
> + *     but continue walking (indicated by queue_pages.has_unmovable).
>    * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
>    *        existing folio was already on a node that does not follow the
>    *        policy.
> @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
>   	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
>   		if (!vma_migratable(walk->vma) ||
>   		    migrate_folio_add(folio, qp->pagelist, flags)) {
> -			ret = 1;
> +			qp->has_unmovable = true;
>   			goto unlock;
>   		}
>   	} else
> @@ -495,9 +495,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
>    *
>    * queue_folios_pte_range() has three possible return values:
>    * 0 - folios are placed on the right node or queued successfully, or
> - *     special page is met, i.e. zero page.
> - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
> - *     specified.
> + *     special page is met, i.e. zero page, or unmovable page is found
> + *     but continue walking (indicated by queue_pages.has_unmovable).
>    * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
>    *        on a node that does not follow the policy.
>    */
> @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
>   	struct folio *folio;
>   	struct queue_pages *qp = walk->private;
>   	unsigned long flags = qp->flags;
> -	bool has_unmovable = false;
>   	pte_t *pte, *mapped_pte;
>   	pte_t ptent;
>   	spinlock_t *ptl;
> @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
>   		if (!queue_folio_required(folio, qp))
>   			continue;
>   		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
> -			/* MPOL_MF_STRICT must be specified if we get here */
> -			if (!vma_migratable(vma)) {
> -				has_unmovable = true;
> -				break;
> -			}
> +			/*
> +			 * MPOL_MF_STRICT must be specified if we get here.
> +			 * Continue walking vmas due to MPOL_MF_MOVE* flags.
> +			 */
> +			if (!vma_migratable(vma))
> +				qp->has_unmovable = true;
>   
>   			/*
>   			 * Do not abort immediately since there may be
> @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
>   			 * need migrate other LRU pages.
>   			 */
>   			if (migrate_folio_add(folio, qp->pagelist, flags))
> -				has_unmovable = true;
> +				qp->has_unmovable = true;
>   		} else
>   			break;
>   	}
>   	pte_unmap_unlock(mapped_pte, ptl);
>   	cond_resched();
>   
> -	if (has_unmovable)
> -		return 1;
> -
>   	return addr != end ? -EIO : 0;
>   }
>   
> @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
>   		 * Detecting misplaced folio but allow migrating folios which
>   		 * have been queued.
>   		 */
> -		ret = 1;
> +		qp->has_unmovable = true;
>   		goto unlock;
>   	}
>   
> @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
>   			 * Failed to isolate folio but allow migrating pages
>   			 * which have been queued.
>   			 */
> -			ret = 1;
> +			qp->has_unmovable = true;
>   	}
>   unlock:
>   	spin_unlock(ptl);
> @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
>   		.start = start,
>   		.end = end,
>   		.first = NULL,
> +		.has_unmovable = false,
>   	};
>   	const struct mm_walk_ops *ops = lock_vma ?
>   			&queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
>   
>   	err = walk_page_range(mm, start, end, ops, &qp);
>   
> +	if (qp.has_unmovable)
> +		err = 1;
>   	if (!qp.first)
>   		/* whole range in hole */
>   		err = -EFAULT;
> @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len,
>   				putback_movable_pages(&pagelist);
>   		}
>   
> -		if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
> +		if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT))
>   			err = -EIO;
>   	} else {
>   up_out:

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ