[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <65a9b473-9696-04ae-862b-8d4947c22703@nvidia.com>
Date: Fri, 5 Aug 2022 01:15:41 -0700
From: John Hubbard <jhubbard@...dia.com>
To: Alistair Popple <apopple@...dia.com>, linux-mm@...ck.org
Cc: akpm@...ux-foundation.org, linux-kernel@...r.kernel.org,
"Sierra Guiza, Alejandro (Alex)" <alex.sierra@....com>,
Chaitanya Kulkarni <kch@...dia.com>,
Dan Williams <dan.j.williams@...el.com>,
Felix Kuehling <Felix.Kuehling@....com>,
Jason Gunthorpe <jgg@...dia.com>,
Logan Gunthorpe <logang@...tatee.com>,
Miaohe Lin <linmiaohe@...wei.com>,
Muchun Song <songmuchun@...edance.com>,
Ralph Campbell <rcampbell@...dia.com>,
David Hildenbrand <david@...hat.com>
Subject: Re: [PATCH v2 2/2] mm/gup.c: Refactor
check_and_migrate_movable_pages()
On 8/4/22 23:29, Alistair Popple wrote:
> When pinning pages with FOLL_LONGTERM check_and_migrate_movable_pages()
> is called to migrate pages out of zones which should not contain any
> longterm pinned pages.
>
> When migration succeeds all pages will have been unpinned so pinning
> needs to be retried. Migration can also fail, in which case the pages
> will also have been unpinned but the operation should not be retried. If
> all pages are in the correct zone nothing will be unpinned and no retry
> is required.
>
> The logic in check_and_migrate_movable_pages() tracks unnecessary state
> and the return codes for each case are difficult to follow. Refactor the
> code to clean this up. No behaviour change is intended.
>
> Signed-off-by: Alistair Popple <apopple@...dia.com>
>
> ---
>
> Changes for v2:
>
> - Split into different functions as suggested by John.
> - Made error handling more conventional as requested by Jason.
>
> Personally I'm not entirely convinced the conventional approach to error
> handling is easier to follow here but have left it in for feedback as I
> might be getting too familiar with the code.
>
> Originally posted as "mm/gup.c: Simplify and fix
> check_and_migrate_movable_pages() return codes"[1].
>
> Changes from that version:
>
> - Restore the original isolation failure behaviour and don't fail the
> pup. Instead retry indefinitely.
> - Unpin all pages on retry or failure rather than just failure.
>
> [1] https://lore.kernel.org/linux-mm/814dee5d3aadd38c3370eaaf438ba7eee9bf9d2b.1659399696.git-series.apopple@nvidia.com/
> ---
> mm/gup.c | 159 ++++++++++++++++++++++++++++++--------------------------
> 1 file changed, 88 insertions(+), 71 deletions(-)
>
> diff --git a/mm/gup.c b/mm/gup.c
> index e26ccc0..60cb30a 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1900,20 +1900,14 @@ struct page *get_dump_page(unsigned long addr)
> #endif /* CONFIG_ELF_CORE */
>
> #ifdef CONFIG_MIGRATION
> -/*
> - * Check whether all pages are pinnable, if so return number of pages. If some
> - * pages are not pinnable, migrate them, and unpin all pages. Return zero if
> - * pages were migrated, or if some pages were not successfully isolated.
> - * Return negative error if migration fails.
> - */
> -static long check_and_migrate_movable_pages(unsigned long nr_pages,
> - struct page **pages)
Let's add:
/*
* Returns the number of collected pages. Return value is always >= 0.
*/
> +static int collect_unpinnable_pages(struct list_head *movable_page_list,
> + unsigned long nr_pages,
> + struct page **pages)
> {
> - unsigned long isolation_error_count = 0, i;
> + unsigned long i;
> + int collected = 0;
This should be an unsigned long.
> struct folio *prev_folio = NULL;
> - LIST_HEAD(movable_page_list);
> - bool drain_allow = true, coherent_pages = false;
> - int ret = 0;
> + bool drain_allow = true;
>
> for (i = 0; i < nr_pages; i++) {
> struct folio *folio = page_folio(pages[i]);
> @@ -1922,43 +1916,16 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
> continue;
> prev_folio = folio;
>
> - /*
> - * Device coherent pages are managed by a driver and should not
> - * be pinned indefinitely as it prevents the driver moving the
> - * page. So when trying to pin with FOLL_LONGTERM instead try
> - * to migrate the page out of device memory.
> - */
> - if (folio_is_device_coherent(folio)) {
> - /*
> - * We always want a new GUP lookup with device coherent
> - * pages.
> - */
> - pages[i] = 0;
> - coherent_pages = true;
> -
> - /*
> - * Migration will fail if the page is pinned, so convert
> - * the pin on the source page to a normal reference.
> - */
> - get_page(&folio->page);
> - unpin_user_page(&folio->page);
> + if (folio_is_longterm_pinnable(folio))
> + continue;
>
> - ret = migrate_device_coherent_page(&folio->page);
> - if (ret)
> - goto unpin_pages;
> + collected++;
>
> + if (folio_is_device_coherent(folio))
> continue;
> - }
>
> - if (folio_is_longterm_pinnable(folio))
> - continue;
> - /*
> - * Try to move out any movable page before pinning the range.
> - */
> if (folio_test_hugetlb(folio)) {
> - if (isolate_hugetlb(&folio->page,
> - &movable_page_list))
> - isolation_error_count++;
> + isolate_hugetlb(&folio->page, movable_page_list);
> continue;
> }
>
> @@ -1967,59 +1934,109 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
> drain_allow = false;
> }
>
> - if (folio_isolate_lru(folio)) {
> - isolation_error_count++;
> + if (!folio_isolate_lru(folio))
> continue;
> - }
> - list_add_tail(&folio->lru, &movable_page_list);
> +
> + list_add_tail(&folio->lru, movable_page_list);
> node_stat_mod_folio(folio,
> NR_ISOLATED_ANON + folio_is_file_lru(folio),
> folio_nr_pages(folio));
> }
>
> - if (!list_empty(&movable_page_list) || isolation_error_count ||
> - coherent_pages)
> - goto unpin_pages;
> + return collected;
> +}
>
> - /*
> - * If list is empty, and no isolation errors, means that all pages are
> - * in the correct zone.
> - */
> - return nr_pages;
Let's add approximately (maybe there is better wording):
/*
* Returns zero for success, or -errno for failure (or partial success).
*/
> +static int migrate_unpinnable_pages(struct list_head *movable_page_list,
> + unsigned long nr_pages,
> + struct page **pages)
> +{
> + int ret, i;
There is an uneasy mixing of int and long and unsigned long. Instead, this
should use unsigned long whenever it deals with nr_pages. Like here.
>
> -unpin_pages:
> - /*
> - * pages[i] might be NULL if any device coherent pages were found.
> - */
> for (i = 0; i < nr_pages; i++) {
> - if (!pages[i])
> + struct folio *folio = page_folio(pages[i]);
> +
> + if (folio_is_device_coherent(folio)) {
> + /*
> + * Migration will fail if the page is pinned, so convert
> + * the pin on the source page to a normal reference.
> + */
> + pages[i] = NULL;
> + get_page(&folio->page);
> + unpin_user_page(&folio->page);
> +
> + if (migrate_device_coherent_page(&folio->page)) {
> + ret = -EBUSY;
> + goto err;
> + }
> +
> continue;
> + }
>
> + /*
> + * We can't migrate pages with unexpected references, so drop
> + * the reference obtained by get_user_pages().
The get_user_pages() reference is confusing, since we only handle FOLL_PIN
here. It's hard to connect the comment to the code. Maybe a more precise
pointer to where the reference was taken would help.
> + * folio_isolate_lru() takes a reference so the page won't be
> + * freed.
Also confusing because it's difficult to connect the comment back to the code.
Maybe mention where folio_isolate_lru() is called from in this case?
> + */
> unpin_user_page(pages[i]);
> + pages[i] = NULL;
Is this correct? The loop covers all of nr_pages, so we are setting every
pages[i] = NULL, for non-DEV_COHERENT cases. This seems wrong.
> }
>
> - if (!list_empty(&movable_page_list)) {
> + if (!list_empty(movable_page_list)) {
> struct migration_target_control mtc = {
> .nid = NUMA_NO_NODE,
> .gfp_mask = GFP_USER | __GFP_NOWARN,
> };
>
> - ret = migrate_pages(&movable_page_list, alloc_migration_target,
> - NULL, (unsigned long)&mtc, MIGRATE_SYNC,
> - MR_LONGTERM_PIN, NULL);
> - if (ret > 0) /* number of pages not migrated */
> + if (migrate_pages(movable_page_list, alloc_migration_target,
> + NULL, (unsigned long)&mtc, MIGRATE_SYNC,
> + MR_LONGTERM_PIN, NULL)) {
> ret = -ENOMEM;
> + goto err;
> + }
> }
>
> - if (ret && !list_empty(&movable_page_list))
> - putback_movable_pages(&movable_page_list);
> + putback_movable_pages(movable_page_list);
> +
> + return 0;
> +
> +err:
> + for (i = 0; i < nr_pages; i++)
> + if (pages[i])
> + unpin_user_page(pages[i]);
> + putback_movable_pages(movable_page_list);
> +
> return ret;
> }
> +
> +/*
> + * Check whether all pages are pinnable. If some pages are not pinnable migrate
> + * them and unpin all the pages. Returns -EAGAIN if pages were unpinned or zero
> + * if all pages are pinnable and in the right zone. Other errors indicate
> + * migration failure.
> + */
> +static long check_and_migrate_movable_pages(unsigned long nr_pages,
> + struct page **pages)
> +{
> + int ret;
> + long collected;
That should be unsigned long, too.
> + LIST_HEAD(movable_page_list);
> +
> + collected = collect_unpinnable_pages(&movable_page_list, nr_pages, pages);
> + if (!collected)
> + return 0;
> +
> + ret = migrate_unpinnable_pages(&movable_page_list, nr_pages, pages);
> + if (!ret)
> + return -EAGAIN;
> + else
> + return ret;
> +}
> #else
> static long check_and_migrate_movable_pages(unsigned long nr_pages,
> struct page **pages)
> {
> - return nr_pages;
> + return 0;
> }
> #endif /* CONFIG_MIGRATION */
>
> @@ -2049,10 +2066,10 @@ static long __gup_longterm_locked(struct mm_struct *mm,
>
> WARN_ON_ONCE(!(gup_flags & FOLL_PIN));
> rc = check_and_migrate_movable_pages(rc, pages);
> - } while (!rc);
> + } while (rc == -EAGAIN);
> memalloc_pin_restore(flags);
>
> - return rc;
> + return rc ? rc : nr_pages;
This seems to add some holes in the handling of error cases. If -EBUSY
or -ENOMEM happens, then we return an -errno, rather than the number
of pinned pages. But some pages may have been pinned.
Previously it just looped if there was any error, so that couldn't
happen.
thanks,
--
John Hubbard
NVIDIA
> }
>
> static bool is_valid_gup_flags(unsigned int gup_flags)
Powered by blists - more mailing lists