lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Yvpg6odyDsXrjw5i@dhcp22.suse.cz>
Date:   Mon, 15 Aug 2022 17:06:18 +0200
From:   Michal Hocko <mhocko@...e.com>
To:     Charan Teja Kalla <quic_charante@...cinc.com>
Cc:     akpm@...ux-foundation.org, david@...hat.com,
        pasha.tatashin@...een.com, sieberf@...zon.com, shakeelb@...gle.com,
        sjpark@...zon.de, dhowells@...hat.com, willy@...radead.org,
        quic_pkondeti@...cinc.com, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org
Subject: Re: [PATCH V3] mm: fix use-after free of page_ext after race with
 memory-offline

On Tue 09-08-22 20:16:43, Charan Teja Kalla wrote:
[...]
> diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
> index fabb2e1..0e259da 100644
> --- a/include/linux/page_ext.h
> +++ b/include/linux/page_ext.h
[...]
> @@ -87,5 +83,14 @@ static inline void page_ext_init_flatmem_late(void)
>  static inline void page_ext_init_flatmem(void)
>  {
>  }
> +
> +static inline struct page *page_ext_get(struct page *page)

struct page_ext *

> +{
> +	return NULL;
> +}
> +
> +static inline void page_ext_put(void)
> +{
> +}
>  #endif /* CONFIG_PAGE_EXTENSION */
>  #endif /* __LINUX_PAGE_EXT_H */
[...]
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index 3dc715d..91d7bd2 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -9,6 +9,7 @@
>  #include <linux/page_owner.h>
>  #include <linux/page_idle.h>
>  #include <linux/page_table_check.h>
> +#include <linux/rcupdate.h>
>  
>  /*
>   * struct page extension
> @@ -59,6 +60,10 @@
>   * can utilize this callback to initialize the state of it correctly.
>   */
>  
> +#ifdef CONFIG_SPARSEMEM
> +#define PAGE_EXT_INVALID       (0x1)
> +#endif
> +
>  #if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
>  static bool need_page_idle(void)
>  {
> @@ -84,6 +89,7 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
>  unsigned long page_ext_size = sizeof(struct page_ext);
>  
>  static unsigned long total_usage;
> +static struct page_ext *lookup_page_ext(const struct page *page);
>  
>  static bool __init invoke_need_callbacks(void)
>  {
> @@ -125,6 +131,37 @@ static inline struct page_ext *get_entry(void *base, unsigned long index)
>  	return base + page_ext_size * index;
>  }
>  
> +/*
> + * This function gives proper page_ext of a memory section
> + * during race with the offline operation on a memory block
> + * this section falls into. Not using this function to get
> + * page_ext of a page, in code paths where extra refcount
> + * is not taken on that page eg: pfn walking, can lead to
> + * use-after-free access of page_ext.

I do not think this is really useful comment, it goes into way too much
detail about memory hotplug yet not enough to actually understand the
interaction because there are no references to the actual
synchronization scheme. I would go with something like:

/*
 * Get a page_ext associated with the given page. Returns NULL if
 * no such page_ext exists otherwise ensures that the page_ext will
 * stay alive until page_ext_put is called.
 * This implies a non-sleeping context.
 */
> + */
> +struct page_ext *page_ext_get(struct page *page)
> +{
> +	struct page_ext *page_ext;
> +
> +	rcu_read_lock();
> +	page_ext = lookup_page_ext(page);
> +	if (!page_ext) {
> +		rcu_read_unlock();
> +		return NULL;
> +	}
> +
> +	return page_ext;
> +}
> +
> +/*
> + * Must be called after work is done with the page_ext received
> + * with page_ext_get().
> + */
> +
> +void page_ext_put(void)
> +{
> +	rcu_read_unlock();
> +}

Thinking about this some more I am not sure this is a good interface. It
doesn't have any reference to the actual object this is called for. This
is nicely visible in __folio_copy_owner which just calles page_ext_put()
twice because there are 2 page_exts and I can already see how somebody
might get confused this is just an error and send a patch to drop one of
them.

I do understand why you went this way because having a parameter which
is not used will likely lead to the same situation. On the other hand it
could be annotated to not raise warnings. One potential way to
workaround that would be

void page_ext_put(struct page_ext *page_ext)
{
	if (unlikely(!page_ext))
		return;
	
	rcu_read_unlock();
}

which would help to make the api slightly more robust in case somebody
does page_ext_put in a branch where page_ext_get returns NULL.

No strong opinion on that though. WDYI?

>  #ifndef CONFIG_SPARSEMEM
>  
>  
[...]
> @@ -183,19 +184,26 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext,
>  noinline void __set_page_owner(struct page *page, unsigned short order,
>  					gfp_t gfp_mask)
>  {
> -	struct page_ext *page_ext = lookup_page_ext(page);
> +	struct page_ext *page_ext = page_ext_get(page);
>  	depot_stack_handle_t handle;
>  
>  	if (unlikely(!page_ext))
>  		return;

Either add a comment like this
	/* save_stack can sleep in general so we have to page_ext_put */
> +	page_ext_put();
>  
>  	handle = save_stack(gfp_mask);

or just drop the initial page_ext_get altogether. This function is
called only when page_ext is supposed to be initialized and !page_ext
case above should be very unlikely. Or is there any reason to keep this?

> +
> +	/* Ensure page_ext is valid after page_ext_put() above */
> +	page_ext = page_ext_get(page);
> +	if (unlikely(!page_ext))
> +		return;
>  	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
> +	page_ext_put();
>  }
>  
[...]
> @@ -558,13 +590,17 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
>  		 */
>  		handle = READ_ONCE(page_owner->handle);
>  		if (!handle)
> -			continue;
> +			goto loop;
>  
>  		/* Record the next PFN to read in the file offset */
>  		*ppos = (pfn - min_low_pfn) + 1;
>  
> +		memcpy(&page_owner_tmp, page_owner, sizeof(struct page_owner));
> +		page_ext_put();

why not
		page_owner_tmp = *page_owner;

>  		return print_page_owner(buf, count, pfn, page,
> -				page_owner, handle);
> +				&page_owner_tmp, handle);
> +loop:
> +		page_ext_put();
>  	}
>  
>  	return 0;

Otherwise looks good to me.

Thanks!
-- 
Michal Hocko
SUSE Labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ