lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <45942676-a87f-c6ef-1514-cabfc4a5fa8e@nvidia.com>
Date:   Wed, 16 Sep 2020 17:34:17 -0700
From:   Ralph Campbell <rcampbell@...dia.com>
To:     Christoph Hellwig <hch@....de>
CC:     <linux-mm@...ck.org>, <kvm-ppc@...r.kernel.org>,
        <nouveau@...ts.freedesktop.org>, <linux-kernel@...r.kernel.org>,
        Dan Williams <dan.j.williams@...el.com>,
        Ira Weiny <ira.weiny@...el.com>,
        Matthew Wilcox <willy@...radead.org>,
        Jerome Glisse <jglisse@...hat.com>,
        John Hubbard <jhubbard@...dia.com>,
        Alistair Popple <apopple@...dia.com>,
        Jason Gunthorpe <jgg@...dia.com>,
        Bharata B Rao <bharata@...ux.ibm.com>,
        Zi Yan <ziy@...dia.com>,
        "Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
        Yang Shi <yang.shi@...ux.alibaba.com>,
        Paul Mackerras <paulus@...abs.org>,
        Ben Skeggs <bskeggs@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [PATCH] mm: remove extra ZONE_DEVICE struct page refcount


On 9/15/20 11:09 PM, Christoph Hellwig wrote:
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 517751310dd2..5a82037a4b26 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1093,34 +1093,6 @@ static inline bool is_zone_device_page(const struct page *page)
>>   #ifdef CONFIG_DEV_PAGEMAP_OPS
>>   void free_devmap_managed_page(struct page *page);
>>   DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
> 
> The export for devmap_managed_key can be dropped now.  In fact I think
> we can remove devmap_managed_key entirely now - it is only checked in
> the actual page free path instead of for each refcount manipulation,
> so a good old unlikely is probably enough.
> 
> Also free_devmap_managed_page can move to mm/internal.h.

Good suggestion.

>> +#ifdef CONFIG_DEV_PAGEMAP_OPS
>> +static void __put_devmap_managed_page(struct page *page)
>> +{
>> +	if (!static_branch_unlikely(&devmap_managed_key))
>> +		return;
>> +
>> +	switch (page->pgmap->type) {
>> +	case MEMORY_DEVICE_PRIVATE:
>> +	case MEMORY_DEVICE_FS_DAX:
>> +		free_devmap_managed_page(page);
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +}
>> +#else
>> +static inline void __put_devmap_managed_page(struct page *page)
>> +{
>> +}
>> +#endif
> 
> I think this should be moved to mm/memremap.c or even better
> actually be folded into free_devmap_managed_page, which would need
> a new name like free_zone_device_page().
> 
> Something like this incremental patch:
> 
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7bb9e93cf86cde..29350dc27cd0cd 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1090,11 +1090,6 @@ static inline bool is_zone_device_page(const struct page *page)
>   }
>   #endif
>   
> -#ifdef CONFIG_DEV_PAGEMAP_OPS
> -void free_devmap_managed_page(struct page *page);
> -DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
> -#endif /* CONFIG_DEV_PAGEMAP_OPS */
> -
>   static inline bool is_device_private_page(const struct page *page)
>   {
>   	return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
> diff --git a/mm/internal.h b/mm/internal.h
> index 6345b08ce86ccf..629959a6f26d7c 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -618,4 +618,12 @@ struct migration_target_control {
>   	gfp_t gfp_mask;
>   };
>   
> +#ifdef CONFIG_DEV_PAGEMAP_OPS
> +void free_zone_device_page(struct page *page);
> +#else
> +static inline void free_zone_device_page(struct page *page)
> +{
> +}
> +#endif
> +
>   #endif	/* __MM_INTERNAL_H */
> diff --git a/mm/memremap.c b/mm/memremap.c
> index d549e3733f4098..b15ad2264a4f1c 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -12,6 +12,7 @@
>   #include <linux/types.h>
>   #include <linux/wait_bit.h>
>   #include <linux/xarray.h>
> +#include "internal.h"
>   
>   static DEFINE_XARRAY(pgmap_array);
>   
> @@ -37,36 +38,6 @@ unsigned long memremap_compat_align(void)
>   EXPORT_SYMBOL_GPL(memremap_compat_align);
>   #endif
>   
> -#ifdef CONFIG_DEV_PAGEMAP_OPS
> -DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
> -EXPORT_SYMBOL(devmap_managed_key);
> -
> -static void devmap_managed_enable_put(void)
> -{
> -	static_branch_dec(&devmap_managed_key);
> -}
> -
> -static int devmap_managed_enable_get(struct dev_pagemap *pgmap)
> -{
> -	if (pgmap->type == MEMORY_DEVICE_PRIVATE &&
> -	    (!pgmap->ops || !pgmap->ops->page_free)) {
> -		WARN(1, "Missing page_free method\n");
> -		return -EINVAL;
> -	}
> -
> -	static_branch_inc(&devmap_managed_key);
> -	return 0;
> -}
> -#else
> -static int devmap_managed_enable_get(struct dev_pagemap *pgmap)
> -{
> -	return -EINVAL;
> -}
> -static void devmap_managed_enable_put(void)
> -{
> -}
> -#endif /* CONFIG_DEV_PAGEMAP_OPS */
> -
>   static void pgmap_array_delete(struct range *range)
>   {
>   	xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end),
> @@ -181,7 +152,6 @@ void memunmap_pages(struct dev_pagemap *pgmap)
>   		pageunmap_range(pgmap, i);
>   
>   	WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
> -	devmap_managed_enable_put();
>   }
>   EXPORT_SYMBOL_GPL(memunmap_pages);
>   
> @@ -319,7 +289,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
>   		.pgprot = PAGE_KERNEL,
>   	};
>   	const int nr_range = pgmap->nr_range;
> -	bool need_devmap_managed = true;
>   	int error, i;
>   
>   	if (WARN_ONCE(!nr_range, "nr_range must be specified\n"))
> @@ -331,8 +300,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
>   			WARN(1, "Device private memory not supported\n");
>   			return ERR_PTR(-EINVAL);
>   		}
> -		if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
> -			WARN(1, "Missing migrate_to_ram method\n");
> +		if (!pgmap->ops ||
> +		    !pgmap->ops->migrate_to_ram || !pgmap->ops->page_free) {
> +			WARN(1, "Missing ops\n");
>   			return ERR_PTR(-EINVAL);
>   		}
>   		if (!pgmap->owner) {
> @@ -348,11 +318,9 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
>   		}
>   		break;
>   	case MEMORY_DEVICE_GENERIC:
> -		need_devmap_managed = false;
>   		break;
>   	case MEMORY_DEVICE_PCI_P2PDMA:
>   		params.pgprot = pgprot_noncached(params.pgprot);
> -		need_devmap_managed = false;
>   		break;
>   	default:
>   		WARN(1, "Invalid pgmap type %d\n", pgmap->type);
> @@ -376,12 +344,6 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
>   		}
>   	}
>   
> -	if (need_devmap_managed) {
> -		error = devmap_managed_enable_get(pgmap);
> -		if (error)
> -			return ERR_PTR(error);
> -	}
> -
>   	/*
>   	 * Clear the pgmap nr_range as it will be incremented for each
>   	 * successfully processed range. This communicates how many
> @@ -496,16 +458,9 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
>   EXPORT_SYMBOL_GPL(get_dev_pagemap);
>   
>   #ifdef CONFIG_DEV_PAGEMAP_OPS
> -void free_devmap_managed_page(struct page *page)
> +static void free_device_private_page(struct page *page)
>   {
> -	/* notify page idle for dax */
> -	if (!is_device_private_page(page)) {
> -		wake_up_var(&page->_refcount);
> -		return;
> -	}
> -
>   	__ClearPageWaiters(page);
> -
>   	mem_cgroup_uncharge(page);
>   
>   	/*
> @@ -540,4 +495,19 @@ void free_devmap_managed_page(struct page *page)
>   	page->mapping = NULL;
>   	page->pgmap->ops->page_free(page);
>   }
> +
> +void free_zone_device_page(struct page *page)
> +{
> +	switch (page->pgmap->type) {
> +	case MEMORY_DEVICE_FS_DAX:
> +		/* notify page idle */
> +		wake_up_var(&page->_refcount);
> +		return;
> +	case MEMORY_DEVICE_PRIVATE:
> +		free_device_private_page(page);
> +		return;
> +	default:
> +		return;
> +	}
> +}
>   #endif /* CONFIG_DEV_PAGEMAP_OPS */
> diff --git a/mm/swap.c b/mm/swap.c
> index bcab5db351184a..83451ac70d0f05 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -113,36 +113,14 @@ static void __put_compound_page(struct page *page)
>   	destroy_compound_page(page);
>   }
>   
> -#ifdef CONFIG_DEV_PAGEMAP_OPS
> -static void __put_devmap_managed_page(struct page *page)
> -{
> -	if (!static_branch_unlikely(&devmap_managed_key))
> -		return;
> -
> -	switch (page->pgmap->type) {
> -	case MEMORY_DEVICE_PRIVATE:
> -	case MEMORY_DEVICE_FS_DAX:
> -		free_devmap_managed_page(page);
> -		break;
> -	default:
> -		break;
> -	}
> -}
> -#else
> -static inline void __put_devmap_managed_page(struct page *page)
> -{
> -}
> -#endif
> -
>   void __put_page(struct page *page)
>   {
>   	if (is_zone_device_page(page)) {
> -		__put_devmap_managed_page(page);
> -
>   		/*
>   		 * The page belongs to the device that created pgmap. Do
>   		 * not return it to page allocator.
>   		 */
> +		free_zone_device_page(page);
>   		return;
>   	}
>   
> @@ -923,7 +901,7 @@ void release_pages(struct page **pages, int nr)
>   						       flags);
>   				locked_pgdat = NULL;
>   			}
> -			__put_devmap_managed_page(page);
> +			free_zone_device_page(page);
>   			return;
>   		}
>   
> 

Thanks for the review!
I will apply the above in v2.
I found a couple of more reference count checks in fs/dax.c so I need to
run fstests with dax before sending v2 out.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ