lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <61ff7fe6-9a79-4dd3-8076-7106fe08be5c@nvidia.com>
Date: Thu, 21 Nov 2024 18:57:16 -0800
From: John Hubbard <jhubbard@...dia.com>
To: Alistair Popple <apopple@...dia.com>, <dan.j.williams@...el.com>,
	<linux-mm@...ck.org>
CC: <lina@...hilina.net>, <zhang.lyra@...il.com>,
	<gerald.schaefer@...ux.ibm.com>, <vishal.l.verma@...el.com>,
	<dave.jiang@...el.com>, <logang@...tatee.com>, <bhelgaas@...gle.com>,
	<jack@...e.cz>, <jgg@...pe.ca>, <catalin.marinas@....com>, <will@...nel.org>,
	<mpe@...erman.id.au>, <npiggin@...il.com>, <dave.hansen@...ux.intel.com>,
	<ira.weiny@...el.com>, <willy@...radead.org>, <djwong@...nel.org>,
	<tytso@....edu>, <linmiaohe@...wei.com>, <david@...hat.com>,
	<peterx@...hat.com>, <linux-doc@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, <linux-arm-kernel@...ts.infradead.org>,
	<linuxppc-dev@...ts.ozlabs.org>, <nvdimm@...ts.linux.dev>,
	<linux-cxl@...r.kernel.org>, <linux-fsdevel@...r.kernel.org>,
	<linux-ext4@...r.kernel.org>, <linux-xfs@...r.kernel.org>, <hch@....de>,
	<david@...morbit.com>
Subject: Re: [PATCH v3 05/25] fs/dax: Create a common implementation to break
 DAX layouts

On 11/21/24 5:40 PM, Alistair Popple wrote:
> Prior to freeing a block file systems supporting FS DAX must check
> that the associated pages are both unmapped from user-space and not
> undergoing DMA or other access from eg. get_user_pages(). This is
> achieved by unmapping the file range and scanning the FS DAX
> page-cache to see if any pages within the mapping have an elevated
> refcount.
> 
> This is done using two functions - dax_layout_busy_page_range() which
> returns a page to wait for the refcount to become idle on. Rather than
> open-code this introduce a common implementation to both unmap and
> wait for the page to become idle.
> 
> Signed-off-by: Alistair Popple <apopple@...dia.com>
> ---
>   fs/dax.c            | 29 +++++++++++++++++++++++++++++
>   fs/ext4/inode.c     | 10 +---------
>   fs/fuse/dax.c       | 29 +++++------------------------
>   fs/xfs/xfs_inode.c  | 23 +++++------------------
>   fs/xfs/xfs_inode.h  |  2 +-
>   include/linux/dax.h |  7 +++++++
>   6 files changed, 48 insertions(+), 52 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index efc1d56..b1ad813 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -845,6 +845,35 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
>   	return ret;
>   }
>   
> +static int wait_page_idle(struct page *page,
> +			void (cb)(struct inode *),
> +			struct inode *inode)
> +{
> +	return ___wait_var_event(page, page_ref_count(page) == 1,
> +				TASK_INTERRUPTIBLE, 0, 0, cb(inode));
> +}
> +
> +/*
> + * Unmaps the inode and waits for any DMA to complete prior to deleting the
> + * DAX mapping entries for the range.
> + */
> +int dax_break_mapping(struct inode *inode, loff_t start, loff_t end,
> +		void (cb)(struct inode *))
> +{
> +	struct page *page;
> +	int error;
> +
> +	do {
> +		page = dax_layout_busy_page_range(inode->i_mapping, start, end);
> +		if (!page)
> +			break;
> +
> +		error = wait_page_idle(page, cb, inode);
> +	} while (error == 0);
> +
> +	return error;
> +}

Hi Alistair!

This needs to be EXPORT'ed. In fact so do two others, but I thought I'd
reply at the exact point that the first fix needs to be inserted, which
is here. And let you sprinkle the remaining two into the right patches.

The overall diff required for me to build the kernel with this series is:

diff --git a/fs/dax.c b/fs/dax.c
index 0169b356b975..35e3c41eb517 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -921,6 +921,7 @@ void dax_delete_mapping_range(struct address_space *mapping,
  	}
  	xas_unlock_irq(&xas);
  }
+EXPORT_SYMBOL_GPL(dax_delete_mapping_range);
  
  static int wait_page_idle(struct page *page,
  			void (cb)(struct inode *),
@@ -961,6 +962,7 @@ int dax_break_mapping(struct inode *inode, loff_t start, loff_t end,
  
  	return error;
  }
+EXPORT_SYMBOL_GPL(dax_break_mapping);
  
  void dax_break_mapping_uninterruptible(struct inode *inode,
  				void (cb)(struct inode *))
@@ -979,6 +981,7 @@ void dax_break_mapping_uninterruptible(struct inode *inode,
  	if (!page)
  		dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
  }
+EXPORT_SYMBOL_GPL(dax_break_mapping_uninterruptible);
  
  /*
   * Invalidate DAX entry if it is clean.


thanks,
John Hubbard


> +
>   /*
>    * Invalidate DAX entry if it is clean.
>    */
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cf87c5b..d42c011 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3885,15 +3885,7 @@ int ext4_break_layouts(struct inode *inode)
>   	if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
>   		return -EINVAL;
>   
> -	do {
> -		page = dax_layout_busy_page(inode->i_mapping);
> -		if (!page)
> -			return 0;
> -
> -		error = dax_wait_page_idle(page, ext4_wait_dax_page, inode);
> -	} while (error == 0);
> -
> -	return error;
> +	return dax_break_mapping_inode(inode, ext4_wait_dax_page);
>   }
>   
>   /*
> diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
> index af436b5..2493f9c 100644
> --- a/fs/fuse/dax.c
> +++ b/fs/fuse/dax.c
> @@ -665,38 +665,19 @@ static void fuse_wait_dax_page(struct inode *inode)
>   	filemap_invalidate_lock(inode->i_mapping);
>   }
>   
> -/* Should be called with mapping->invalidate_lock held exclusively */
> -static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
> -				    loff_t start, loff_t end)
> -{
> -	struct page *page;
> -
> -	page = dax_layout_busy_page_range(inode->i_mapping, start, end);
> -	if (!page)
> -		return 0;
> -
> -	*retry = true;
> -	return dax_wait_page_idle(page, fuse_wait_dax_page, inode);
> -}
> -
> -/* dmap_end == 0 leads to unmapping of whole file */
> +/* Should be called with mapping->invalidate_lock held exclusively.
> + * dmap_end == 0 leads to unmapping of whole file.
> + */
>   int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
>   				  u64 dmap_end)
>   {
> -	bool	retry;
> -	int	ret;
> -
> -	do {
> -		retry = false;
> -		ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
> -					       dmap_end);
> -	} while (ret == 0 && retry);
>   	if (!dmap_end) {
>   		dmap_start = 0;
>   		dmap_end = LLONG_MAX;
>   	}
>   
> -	return ret;
> +	return dax_break_mapping(inode, dmap_start, dmap_end,
> +				fuse_wait_dax_page);
>   }
>   
>   ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index eb12123..120597a 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -2704,21 +2704,17 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
>   	struct xfs_inode	*ip2)
>   {
>   	int			error;
> -	bool			retry;
>   	struct page		*page;
>   
>   	if (ip1->i_ino > ip2->i_ino)
>   		swap(ip1, ip2);
>   
>   again:
> -	retry = false;
>   	/* Lock the first inode */
>   	xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
> -	error = xfs_break_dax_layouts(VFS_I(ip1), &retry);
> -	if (error || retry) {
> +	error = xfs_break_dax_layouts(VFS_I(ip1));
> +	if (error) {
>   		xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
> -		if (error == 0 && retry)
> -			goto again;
>   		return error;
>   	}
>   
> @@ -2977,19 +2973,11 @@ xfs_wait_dax_page(
>   
>   int
>   xfs_break_dax_layouts(
> -	struct inode		*inode,
> -	bool			*retry)
> +	struct inode		*inode)
>   {
> -	struct page		*page;
> -
>   	xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL);
>   
> -	page = dax_layout_busy_page(inode->i_mapping);
> -	if (!page)
> -		return 0;
> -
> -	*retry = true;
> -	return dax_wait_page_idle(page, xfs_wait_dax_page, inode);
> +	return dax_break_mapping_inode(inode, xfs_wait_dax_page);
>   }
>   
>   int
> @@ -3007,8 +2995,7 @@ xfs_break_layouts(
>   		retry = false;
>   		switch (reason) {
>   		case BREAK_UNMAP:
> -			error = xfs_break_dax_layouts(inode, &retry);
> -			if (error || retry)
> +			if (xfs_break_dax_layouts(inode))
>   				break;
>   			fallthrough;
>   		case BREAK_WRITE:
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 97ed912..0db27ba 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -564,7 +564,7 @@ xfs_itruncate_extents(
>   	return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0);
>   }
>   
> -int	xfs_break_dax_layouts(struct inode *inode, bool *retry);
> +int	xfs_break_dax_layouts(struct inode *inode);
>   int	xfs_break_layouts(struct inode *inode, uint *iolock,
>   		enum layout_break_reason reason);
>   
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 773dfc4..7419c88 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -257,6 +257,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
>   int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
>   int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
>   				      pgoff_t index);
> +int __must_check dax_break_mapping(struct inode *inode, loff_t start,
> +				loff_t end, void (cb)(struct inode *));
> +static inline int __must_check dax_break_mapping_inode(struct inode *inode,
> +						void (cb)(struct inode *))
> +{
> +	return dax_break_mapping(inode, 0, LLONG_MAX, cb);
> +}
>   int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
>   				  struct inode *dest, loff_t destoff,
>   				  loff_t len, bool *is_same,



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ