lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230419041926.GA99028@hu-pkondeti-hyd.qualcomm.com>
Date:   Wed, 19 Apr 2023 09:49:26 +0530
From:   Pavan Kondeti <quic_pkondeti@...cinc.com>
To:     Frank van der Linden <fvdl@...gle.com>
CC:     <quic_charante@...cinc.com>, <akpm@...ux-foundation.org>,
        <hughd@...gle.com>, <willy@...radead.org>,
        <markhemm@...glemail.com>, <rientjes@...gle.com>,
        <surenb@...gle.com>, <shakeelb@...gle.com>,
        <quic_pkondeti@...cinc.com>, <linux-mm@...ck.org>,
        <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH V7 0/2] mm: shmem: support POSIX_FADV_[WILL|DONT]NEED for
 shmem files

On Tue, Apr 18, 2023 at 05:29:42PM +0000, Frank van der Linden wrote:
> Below is a quick patch to allow FADVISE_DONTNEED for shmem to reclaim
> mapped pages too. This would fit our usecase, and matches MADV_PAGEOUT
> more closely.
> 
> The patch series as posted skips mapped pages even if you remove
> the folio_mapped() check, because page_referenced() in
> shrink_page_list() will find page tables with the page mapped,
> and ignore_references is not set when called from reclaim_pages().
> 
> You can make this work in a similar fashion to MADV_PAGEOUT by
> first unmapping a page, but only if the mapping belongs to
> the caller. You just have to change the check for "is there
> only one mapping and am I the owner". To do that, change a few
> lines in try_to_unmap to allow for checking the mm the mapping
> belongs to, and pass in current->mm (NULL still unmaps all mappings).
> 
> I lightly tested this in a somewhat older codebase, so the diff
> below isn't fully tested. But if there are no objections to
> this approach, we could add it on top of your patchset after
> better testing.
> 
> - Frank
> 
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index b87d01660412..4403cc2ccc4c 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -368,6 +368,8 @@ int folio_referenced(struct folio *, int is_locked,
>  
>  void try_to_migrate(struct folio *folio, enum ttu_flags flags);
>  void try_to_unmap(struct folio *, enum ttu_flags flags);
> +void try_to_unmap_mm(struct mm_struct *mm, struct folio *folio,
> +			enum ttu_flags flags);
>  
>  int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
>  				unsigned long end, struct page **pages,
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 8632e02661ac..4d30e8f5afe2 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1443,6 +1443,11 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
>  	munlock_vma_folio(folio, vma, compound);
>  }
>  
> +struct unmap_arg {
> +	enum ttu_flags flags;
> +	struct mm_struct *mm;
> +};
> +
>  /*
>   * @arg: enum ttu_flags will be passed to this argument
>   */
> @@ -1455,7 +1460,11 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>  	struct page *subpage;
>  	bool anon_exclusive, ret = true;
>  	struct mmu_notifier_range range;
> -	enum ttu_flags flags = (enum ttu_flags)(long)arg;
> +	struct unmap_arg *uap = (struct unmap_arg *)arg;
> +	enum ttu_flags flags = uap->flags;
> +
> +	if (uap->mm && uap->mm != mm)
> +		return true;
>  
>  	/*
>  	 * When racing against e.g. zap_pte_range() on another cpu,
> @@ -1776,6 +1785,7 @@ static int folio_not_mapped(struct folio *folio)
>  
>  /**
>   * try_to_unmap - Try to remove all page table mappings to a folio.
> + * @mm: mm to unmap from (NULL to unmap from all)
>   * @folio: The folio to unmap.
>   * @flags: action and flags
>   *
> @@ -1785,11 +1795,16 @@ static int folio_not_mapped(struct folio *folio)
>   *
>   * Context: Caller must hold the folio lock.
>   */
> -void try_to_unmap(struct folio *folio, enum ttu_flags flags)
> +void try_to_unmap_mm(struct mm_struct *mm, struct folio *folio,
> +		enum ttu_flags flags)
>  {
> +	struct unmap_arg ua = {
> +		.flags = flags,
> +		.mm = mm,
> +	};
>  	struct rmap_walk_control rwc = {
>  		.rmap_one = try_to_unmap_one,
> -		.arg = (void *)flags,
> +		.arg = (void *)&ua,
>  		.done = folio_not_mapped,
>  		.anon_lock = folio_lock_anon_vma_read,
>  	};
> @@ -1800,6 +1815,11 @@ void try_to_unmap(struct folio *folio, enum ttu_flags flags)
>  		rmap_walk(folio, &rwc);
>  }
>  
> +void try_to_unmap(struct folio *folio, enum ttu_flags flags)
> +{
> +	try_to_unmap_mm(NULL, folio, flags);
> +}
> +
>  /*
>   * @arg: enum ttu_flags will be passed to this argument.
>   *
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 1af85259b6fc..b24af2fb3378 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -2362,8 +2362,24 @@ static void shmem_isolate_pages_range(struct address_space *mapping, loff_t star
>  
>  		if (!folio_try_get(folio))
>  			continue;
> -		if (folio_test_unevictable(folio) || folio_mapped(folio) ||
> -				folio_isolate_lru(folio)) {
> +
> +		if (folio_test_unevictable(folio)) {
> +			folio_put(folio);
> +			continue;
> +		}
> +
> +		/*
> +		 * If the folio is mapped once, try to unmap it from the
> +		 * caller's page table. If it's still mapped afterwards,
> +		 * it belongs to someone else, and we're not going to
> +		 * change someone else's mapping.
> +		 */
> +		if (folio_mapcount(folio) == 1 && folio_trylock(folio)) {
> +			try_to_unmap_mm(current->mm, folio, TTU_BATCH_FLUSH);
> +			folio_unlock(folio);
> +		}

Is rmap walk can be done from a RCU read critical section which does not
allow explicit blocking? 

Thanks,
Pavan

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ