lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 5 Aug 2022 01:15:41 -0700
From:   John Hubbard <jhubbard@...dia.com>
To:     Alistair Popple <apopple@...dia.com>, linux-mm@...ck.org
Cc:     akpm@...ux-foundation.org, linux-kernel@...r.kernel.org,
        "Sierra Guiza, Alejandro (Alex)" <alex.sierra@....com>,
        Chaitanya Kulkarni <kch@...dia.com>,
        Dan Williams <dan.j.williams@...el.com>,
        Felix Kuehling <Felix.Kuehling@....com>,
        Jason Gunthorpe <jgg@...dia.com>,
        Logan Gunthorpe <logang@...tatee.com>,
        Miaohe Lin <linmiaohe@...wei.com>,
        Muchun Song <songmuchun@...edance.com>,
        Ralph Campbell <rcampbell@...dia.com>,
        David Hildenbrand <david@...hat.com>
Subject: Re: [PATCH v2 2/2] mm/gup.c: Refactor
 check_and_migrate_movable_pages()

On 8/4/22 23:29, Alistair Popple wrote:
> When pinning pages with FOLL_LONGTERM check_and_migrate_movable_pages()
> is called to migrate pages out of zones which should not contain any
> longterm pinned pages.
> 
> When migration succeeds all pages will have been unpinned so pinning
> needs to be retried. Migration can also fail, in which case the pages
> will also have been unpinned but the operation should not be retried. If
> all pages are in the correct zone nothing will be unpinned and no retry
> is required.
> 
> The logic in check_and_migrate_movable_pages() tracks unnecessary state
> and the return codes for each case are difficult to follow. Refactor the
> code to clean this up. No behaviour change is intended.
> 
> Signed-off-by: Alistair Popple <apopple@...dia.com>
> 
> ---
> 
> Changes for v2:
> 
>   - Split into different functions as suggested by John.
>   - Made error handling more conventional as requested by Jason.
> 
> Personally I'm not entirely convinced the conventional approach to error
> handling is easier to follow here but have left it in for feedback as I
> might be getting too familiar with the code.
> 
> Originally posted as "mm/gup.c: Simplify and fix
> check_and_migrate_movable_pages() return codes"[1].
> 
> Changes from that version:
> 
>   - Restore the original isolation failure behaviour and don't fail the
>     pup. Instead retry indefinitely.
>   - Unpin all pages on retry or failure rather than just failure.
> 
> [1] https://lore.kernel.org/linux-mm/814dee5d3aadd38c3370eaaf438ba7eee9bf9d2b.1659399696.git-series.apopple@nvidia.com/
> ---
>   mm/gup.c | 159 ++++++++++++++++++++++++++++++--------------------------
>   1 file changed, 88 insertions(+), 71 deletions(-)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index e26ccc0..60cb30a 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1900,20 +1900,14 @@ struct page *get_dump_page(unsigned long addr)
>   #endif /* CONFIG_ELF_CORE */
>   
>   #ifdef CONFIG_MIGRATION
> -/*
> - * Check whether all pages are pinnable, if so return number of pages.  If some
> - * pages are not pinnable, migrate them, and unpin all pages. Return zero if
> - * pages were migrated, or if some pages were not successfully isolated.
> - * Return negative error if migration fails.
> - */
> -static long check_and_migrate_movable_pages(unsigned long nr_pages,
> -					    struct page **pages)

Let's add:

/*
  * Returns the number of collected pages. Return value is always >= 0.
  */
> +static int collect_unpinnable_pages(struct list_head *movable_page_list,
> +				    unsigned long nr_pages,
> +				    struct page **pages)
>   {
> -	unsigned long isolation_error_count = 0, i;
> +	unsigned long i;
> +	int collected = 0;

This should be an unsigned long.


>   	struct folio *prev_folio = NULL;
> -	LIST_HEAD(movable_page_list);
> -	bool drain_allow = true, coherent_pages = false;
> -	int ret = 0;
> +	bool drain_allow = true;
>   
>   	for (i = 0; i < nr_pages; i++) {
>   		struct folio *folio = page_folio(pages[i]);
> @@ -1922,43 +1916,16 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>   			continue;
>   		prev_folio = folio;
>   
> -		/*
> -		 * Device coherent pages are managed by a driver and should not
> -		 * be pinned indefinitely as it prevents the driver moving the
> -		 * page. So when trying to pin with FOLL_LONGTERM instead try
> -		 * to migrate the page out of device memory.
> -		 */
> -		if (folio_is_device_coherent(folio)) {
> -			/*
> -			 * We always want a new GUP lookup with device coherent
> -			 * pages.
> -			 */
> -			pages[i] = 0;
> -			coherent_pages = true;
> -
> -			/*
> -			 * Migration will fail if the page is pinned, so convert
> -			 * the pin on the source page to a normal reference.
> -			 */
> -			get_page(&folio->page);
> -			unpin_user_page(&folio->page);
> +		if (folio_is_longterm_pinnable(folio))
> +			continue;
>   
> -			ret = migrate_device_coherent_page(&folio->page);
> -			if (ret)
> -				goto unpin_pages;
> +		collected++;
>   
> +		if (folio_is_device_coherent(folio))
>   			continue;
> -		}
>   
> -		if (folio_is_longterm_pinnable(folio))
> -			continue;
> -		/*
> -		 * Try to move out any movable page before pinning the range.
> -		 */
>   		if (folio_test_hugetlb(folio)) {
> -			if (isolate_hugetlb(&folio->page,
> -						&movable_page_list))
> -				isolation_error_count++;
> +			isolate_hugetlb(&folio->page, movable_page_list);
>   			continue;
>   		}
>   
> @@ -1967,59 +1934,109 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>   			drain_allow = false;
>   		}
>   
> -		if (folio_isolate_lru(folio)) {
> -			isolation_error_count++;
> +		if (!folio_isolate_lru(folio))
>   			continue;
> -		}
> -		list_add_tail(&folio->lru, &movable_page_list);
> +
> +		list_add_tail(&folio->lru, movable_page_list);
>   		node_stat_mod_folio(folio,
>   				    NR_ISOLATED_ANON + folio_is_file_lru(folio),
>   				    folio_nr_pages(folio));
>   	}
>   
> -	if (!list_empty(&movable_page_list) || isolation_error_count ||
> -	    coherent_pages)
> -		goto unpin_pages;
> +	return collected;
> +}
>   
> -	/*
> -	 * If list is empty, and no isolation errors, means that all pages are
> -	 * in the correct zone.
> -	 */
> -	return nr_pages;

Let's add approximately (maybe there is better wording):

/*
  * Returns zero for success, or -errno for failure (or partial success).
  */

> +static int migrate_unpinnable_pages(struct list_head *movable_page_list,
> +				    unsigned long nr_pages,
> +				    struct page **pages)
> +{
> +	int ret, i;

There is an uneasy mixing of int and long and unsigned long. Instead, this
should use unsigned long whenever it deals with nr_pages. Like here.

>   
> -unpin_pages:
> -	/*
> -	 * pages[i] might be NULL if any device coherent pages were found.
> -	 */
>   	for (i = 0; i < nr_pages; i++) {
> -		if (!pages[i])
> +		struct folio *folio = page_folio(pages[i]);
> +
> +		if (folio_is_device_coherent(folio)) {
> +			/*
> +			 * Migration will fail if the page is pinned, so convert
> +			 * the pin on the source page to a normal reference.
> +			 */
> +			pages[i] = NULL;
> +			get_page(&folio->page);
> +			unpin_user_page(&folio->page);
> +
> +			if (migrate_device_coherent_page(&folio->page)) {
> +				ret = -EBUSY;
> +				goto err;
> +			}
> +
>   			continue;
> +		}
>   
> +		/*
> +		 * We can't migrate pages with unexpected references, so drop
> +		 * the reference obtained by get_user_pages().

The get_user_pages() reference is confusing, since we only handle FOLL_PIN
here. It's hard to connect the comment to the code. Maybe a more precise
pointer to where the reference was taken would help.

> +		 * folio_isolate_lru() takes a reference so the page won't be
> +		 * freed.

Also confusing because it's difficult to connect the comment back to the code.
Maybe mention where folio_isolate_lru() is called from in this case?


> +		 */
>   		unpin_user_page(pages[i]);
> +		pages[i] = NULL;

Is this correct? The loop covers all of nr_pages, so we are setting every
pages[i] = NULL, for non-DEV_COHERENT cases. This seems wrong.

>   	}
>   
> -	if (!list_empty(&movable_page_list)) {
> +	if (!list_empty(movable_page_list)) {
>   		struct migration_target_control mtc = {
>   			.nid = NUMA_NO_NODE,
>   			.gfp_mask = GFP_USER | __GFP_NOWARN,
>   		};
>   
> -		ret = migrate_pages(&movable_page_list, alloc_migration_target,
> -				    NULL, (unsigned long)&mtc, MIGRATE_SYNC,
> -				    MR_LONGTERM_PIN, NULL);
> -		if (ret > 0) /* number of pages not migrated */
> +		if (migrate_pages(movable_page_list, alloc_migration_target,
> +				  NULL, (unsigned long)&mtc, MIGRATE_SYNC,
> +				  MR_LONGTERM_PIN, NULL)) {
>   			ret = -ENOMEM;
> +			goto err;
> +		}
>   	}
>   
> -	if (ret && !list_empty(&movable_page_list))
> -		putback_movable_pages(&movable_page_list);
> +	putback_movable_pages(movable_page_list);
> +
> +	return 0;
> +
> +err:
> +	for (i = 0; i < nr_pages; i++)
> +		if (pages[i])
> +			unpin_user_page(pages[i]);
> +	putback_movable_pages(movable_page_list);
> +
>   	return ret;
>   }
> +
> +/*
> + * Check whether all pages are pinnable. If some pages are not pinnable migrate
> + * them and unpin all the pages. Returns -EAGAIN if pages were unpinned or zero
> + * if all pages are pinnable and in the right zone. Other errors indicate
> + * migration failure.
> + */
> +static long check_and_migrate_movable_pages(unsigned long nr_pages,
> +					    struct page **pages)
> +{
> +	int ret;
> +	long collected;

That should be unsigned long, too.

> +	LIST_HEAD(movable_page_list);
> +
> +	collected = collect_unpinnable_pages(&movable_page_list, nr_pages, pages);
> +	if (!collected)
> +		return 0;
> +
> +	ret = migrate_unpinnable_pages(&movable_page_list, nr_pages, pages);
> +	if (!ret)
> +		return -EAGAIN;
> +	else
> +		return ret;
> +}
>   #else
>   static long check_and_migrate_movable_pages(unsigned long nr_pages,
>   					    struct page **pages)
>   {
> -	return nr_pages;
> +	return 0;
>   }
>   #endif /* CONFIG_MIGRATION */
>   
> @@ -2049,10 +2066,10 @@ static long __gup_longterm_locked(struct mm_struct *mm,
>   
>   		WARN_ON_ONCE(!(gup_flags & FOLL_PIN));
>   		rc = check_and_migrate_movable_pages(rc, pages);
> -	} while (!rc);
> +	} while (rc == -EAGAIN);
>   	memalloc_pin_restore(flags);
>   
> -	return rc;
> +	return rc ? rc : nr_pages;

This seems to add some holes in the handling of error cases. If -EBUSY
or -ENOMEM happens, then we return an -errno, rather than the number
of pinned pages. But some pages may have been pinned.

Previously it just looped if there was any error, so that couldn't
happen.


thanks,
-- 
John Hubbard
NVIDIA
>   }
>   
>   static bool is_valid_gup_flags(unsigned int gup_flags)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ