lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 25 Oct 2021 19:47:06 +0530
From:   Charan Teja Kalla <charante@...eaurora.org>
To:     hughd@...gle.com, akpm@...ux-foundation.org, vbabka@...e.cz,
        rientjes@...gle.com, david@...hat.com, mhocko@...e.com,
        linux-mm@...ck.org, ",surenb"@google.com
Cc:     linux-kernel@...r.kernel.org
Subject: Re: [PATCH] mm: shmem: implement POSIX_FADV_[WILL|DONT]NEED for shmem

Hello,

Can you please provide your valuable feed back here. There are of great
help to me.

Thanks,
Charan

On 10/8/2021 7:36 PM, Charan Teja Reddy wrote:
> Currently fadvise(2) is supported only for the files that doesn't
> associated with noop_backing_dev_info thus for the files, like shmem,
> fadvise results into NOP. But then there is file_operations->fadvise()
> that lets the file systems to implement their own fadvise
> implementation. Use this support to implement some of the POSIX_FADV_XXX
> functionality for shmem files.
> 
> This patch aims to implement POSIX_FADV_WILLNEED and POSIX_FADV_DONTNEED
> advices to shmem files which can be helpful for the drivers who may want
> to manage the shmem pages of the files that are created through
> shmem_file_setup[_with_mnt]().  An example usecase may be like, driver
> can create the shmem file of the size equal to its requirements and
> map the pages for DMA and then pass the fd to user. The user who knows
> well about the usage of these pages can now decide when these pages are
> not required push them to swap through DONTNEED thus free up memory well
> in advance rather than relying on the reclaim and use WILLNEED when it
> decide that they are useful in the near future. IOW, it lets the clients
> to free up/read the memory when it wants to. Another usecase is that GEM
> objets which are currenlty allocated and managed through shmem files can
> use vfs_fadvise(DONT|WILLNEED) on shmem fd when the driver comes to
> know(like through some hints from user space) that GEM objects are not
> going to use/will need in the near future.
> 
> Signed-off-by: Charan Teja Reddy <charante@...eaurora.org>
> ---
>  mm/shmem.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 139 insertions(+)
> 
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 70d9ce2..ab7ea33 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -38,6 +38,8 @@
>  #include <linux/hugetlb.h>
>  #include <linux/frontswap.h>
>  #include <linux/fs_parser.h>
> +#include <linux/mm_inline.h>
> +#include <linux/fadvise.h>
>  
>  #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
>  
> @@ -2792,6 +2794,142 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
>  	return error;
>  }
>  
> +static int shmem_fadvise_dontneed(struct address_space *mapping, loff_t start,
> +				loff_t end)
> +{
> +	int ret;
> +	struct writeback_control wbc = {
> +		.sync_mode = WB_SYNC_NONE,
> +		.nr_to_write = LONG_MAX,
> +		.range_start = 0,
> +		.range_end = LLONG_MAX,
> +		.for_reclaim = 1,
> +	};
> +	struct page *page;
> +
> +	XA_STATE(xas, &mapping->i_pages, start);
> +	if (!shmem_mapping(mapping))
> +		return -EINVAL;
> +
> +	if (!total_swap_pages)
> +		return 0;
> +
> +	lru_add_drain();
> +
> +	rcu_read_lock();
> +	xas_for_each(&xas, page, end) {
> +		if (xas_retry(&xas, page))
> +			continue;
> +		if (xa_is_value(page))
> +			continue;
> +		if (isolate_lru_page(page))
> +			continue;
> +
> +		inc_node_page_state(page, NR_ISOLATED_ANON +
> +						page_is_file_lru(page));
> +		lock_page(page);
> +		ClearPageDirty(page);
> +		SetPageReclaim(page);
> +		ret = shmem_writepage(page, &wbc);
> +		if (!PageWriteback(page))
> +			ClearPageReclaim(page);
> +		if (ret) {
> +			unlock_page(page);
> +			putback_lru_page(page);
> +			dec_node_page_state(page, NR_ISOLATED_ANON +
> +						page_is_file_lru(page));
> +			continue;
> +		}
> +
> +		/*
> +		 * shmem_writepage() place the page in the swapcache.
> +		 * Delete the page from the swapcache and release the
> +		 * page.
> +		 */
> +		lock_page(page);
> +		delete_from_swap_cache(page);
> +		unlock_page(page);
> +		dec_node_page_state(page, NR_ISOLATED_ANON +
> +						page_is_file_lru(page));
> +		put_page(page);
> +		if (need_resched()) {
> +			xas_pause(&xas);
> +			cond_resched_rcu();
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
> +static int shmem_fadvise_willneed(struct address_space *mapping,
> +				 pgoff_t start, pgoff_t long end)
> +{
> +	struct page *page;
> +
> +	XA_STATE(xas, &mapping->i_pages, start);
> +	rcu_read_lock();
> +	xas_for_each(&xas, page, end) {
> +		if (!xa_is_value(page))
> +			continue;
> +		page = shmem_read_mapping_page(mapping, xas.xa_index);
> +		if (!IS_ERR(page))
> +			put_page(page);
> +
> +		if (need_resched()) {
> +			xas_pause(&xas);
> +			cond_resched_rcu();
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
> +static int shmem_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
> +{
> +	loff_t endbyte;
> +	pgoff_t start_index;
> +	pgoff_t end_index;
> +	struct address_space *mapping;
> +	int ret = 0;
> +
> +	mapping = file->f_mapping;
> +	if (!mapping || len < 0)
> +		return -EINVAL;
> +
> +	endbyte = (u64)offset + (u64)len;
> +	if (!len || endbyte < len)
> +		endbyte = -1;
> +	else
> +		endbyte--;
> +
> +
> +	start_index = offset >> PAGE_SHIFT;
> +	end_index   = endbyte >> PAGE_SHIFT;
> +	switch (advice) {
> +	case POSIX_FADV_DONTNEED:
> +		ret = shmem_fadvise_dontneed(mapping, start_index, end_index);
> +		break;
> +	case POSIX_FADV_WILLNEED:
> +		ret = shmem_fadvise_willneed(mapping, start_index, end_index);
> +		break;
> +	case POSIX_FADV_NORMAL:
> +	case POSIX_FADV_RANDOM:
> +	case POSIX_FADV_SEQUENTIAL:
> +	case POSIX_FADV_NOREUSE:
> +		/*
> +		 * No bad return value, but ignore advice. May have to
> +		 * implement in future.
> +		 */
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return ret;
> +}
> +
>  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
>  {
>  	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
> @@ -3799,6 +3937,7 @@ static const struct file_operations shmem_file_operations = {
>  	.splice_write	= iter_file_splice_write,
>  	.fallocate	= shmem_fallocate,
>  #endif
> +	.fadvise	= shmem_fadvise,
>  };
>  
>  static const struct inode_operations shmem_inode_operations = {
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum, a Linux Foundation Collaborative Project

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ