lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <YyzGJl1AuSD4BwNz@iweiny-desk3>
Date:   Thu, 22 Sep 2022 13:31:34 -0700
From:   Ira Weiny <ira.weiny@...el.com>
To:     "Fabio M. De Francesco" <fmdefrancesco@...il.com>
CC:     Christoph Hellwig <hch@...radead.org>,
        <linux-kernel@...r.kernel.org>,
        "Venkataramanan, Anirudh" <anirudh.venkataramanan@...el.com>
Subject: Re: [PATCH] fs/sysv: Replace kmap() with kmap_local_page()

On Fri, Aug 26, 2022 at 07:50:41PM +0200, Fabio M. De Francesco wrote:
> kmap() is being deprecated in favor of kmap_local_page().
> 
> There are two main problems with kmap(): (1) It comes with an overhead as
> the mapping space is restricted and protected by a global lock for
> synchronization and (2) it also requires global TLB invalidation when the
> kmap’s pool wraps and it might block when the mapping space is fully
> utilized until a slot becomes available.
> 
> With kmap_local_page() the mappings are per thread, CPU local, can take
> page faults, and can be called from any context (including interrupts).
> It is faster than kmap() in kernels with HIGHMEM enabled. Furthermore,
> the tasks can be preempted and, when they are scheduled to run again, the
> kernel virtual addresses are restored and still valid.
> 
> Since its use in fs/sysv is safe everywhere, it should be preferred.
> 
> Therefore, replace kmap() with kmap_local_page() in fs/sysv. kunmap_local()
> requires the mapping address, so return that address from dir_get_page()
> to be used in dir_put_page().
> 
> Cc: "Venkataramanan, Anirudh" <anirudh.venkataramanan@...el.com>
> Suggested-by: Ira Weiny <ira.weiny@...el.com>

Reviewed-by: Ira Weiny <ira.weiny@...el.com>

> Signed-off-by: Fabio M. De Francesco <fmdefrancesco@...il.com>
> ---
> 
> This code is not tested. I have no means to create an SysV filesystem.
> Despite nothing here seems to break the strict rules about the use of
> kmap_local_page(), any help with testing will be much appreciated :-)
> 
>  fs/sysv/dir.c   | 83 ++++++++++++++++++++++---------------------------
>  fs/sysv/namei.c | 26 +++++++++-------
>  fs/sysv/sysv.h  | 19 ++++++++---
>  3 files changed, 65 insertions(+), 63 deletions(-)
> 
> diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
> index 88e38cd8f5c9..130350fde106 100644
> --- a/fs/sysv/dir.c
> +++ b/fs/sysv/dir.c
> @@ -28,12 +28,6 @@ const struct file_operations sysv_dir_operations = {
>  	.fsync		= generic_file_fsync,
>  };
>  
> -static inline void dir_put_page(struct page *page)
> -{
> -	kunmap(page);
> -	put_page(page);
> -}
> -
>  static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
>  {
>  	struct address_space *mapping = page->mapping;
> @@ -52,12 +46,12 @@ static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
>  	return err;
>  }
>  
> -static struct page * dir_get_page(struct inode *dir, unsigned long n)
> +static struct page *dir_get_page(struct inode *dir, unsigned long n, void **page_addr)
>  {
>  	struct address_space *mapping = dir->i_mapping;
>  	struct page *page = read_mapping_page(mapping, n, NULL);
>  	if (!IS_ERR(page))
> -		kmap(page);
> +		*page_addr = kmap_local_page(page);
>  	return page;
>  }
>  
> @@ -80,11 +74,10 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
>  	for ( ; n < npages; n++, offset = 0) {
>  		char *kaddr, *limit;
>  		struct sysv_dir_entry *de;
> -		struct page *page = dir_get_page(inode, n);
> +		struct page *page = dir_get_page(inode, n, (void **)&kaddr);
>  
>  		if (IS_ERR(page))
>  			continue;
> -		kaddr = (char *)page_address(page);
>  		de = (struct sysv_dir_entry *)(kaddr+offset);
>  		limit = kaddr + PAGE_SIZE - SYSV_DIRSIZE;
>  		for ( ;(char*)de <= limit; de++, ctx->pos += sizeof(*de)) {
> @@ -96,11 +89,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
>  			if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN),
>  					fs16_to_cpu(SYSV_SB(sb), de->inode),
>  					DT_UNKNOWN)) {
> -				dir_put_page(page);
> +				dir_put_page(page, kaddr);
>  				return 0;
>  			}
>  		}
> -		dir_put_page(page);
> +		dir_put_page(page, kaddr);
>  	}
>  	return 0;
>  }
> @@ -124,7 +117,8 @@ static inline int namecompare(int len, int maxlen,
>   * itself (as a parameter - res_dir). It does NOT read the inode of the
>   * entry - you'll have to do that yourself if you want to.
>   */
> -struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_page)
> +struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry,
> +				       struct page **res_page, void **res_page_addr)
>  {
>  	const char * name = dentry->d_name.name;
>  	int namelen = dentry->d_name.len;
> @@ -133,8 +127,10 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
>  	unsigned long npages = dir_pages(dir);
>  	struct page *page = NULL;
>  	struct sysv_dir_entry *de;
> +	char *kaddr;
>  
>  	*res_page = NULL;
> +	*res_page_addr = NULL;
>  
>  	start = SYSV_I(dir)->i_dir_start_lookup;
>  	if (start >= npages)
> @@ -142,10 +138,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
>  	n = start;
>  
>  	do {
> -		char *kaddr;
> -		page = dir_get_page(dir, n);
> +		page = dir_get_page(dir, n, (void **)&kaddr);
>  		if (!IS_ERR(page)) {
> -			kaddr = (char*)page_address(page);
>  			de = (struct sysv_dir_entry *) kaddr;
>  			kaddr += PAGE_SIZE - SYSV_DIRSIZE;
>  			for ( ; (char *) de <= kaddr ; de++) {
> @@ -155,7 +149,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
>  							name, de->name))
>  					goto found;
>  			}
> -			dir_put_page(page);
> +			dir_put_page(page, kaddr);
>  		}
>  
>  		if (++n >= npages)
> @@ -167,6 +161,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
>  found:
>  	SYSV_I(dir)->i_dir_start_lookup = n;
>  	*res_page = page;
> +	*res_page_addr = kaddr;
>  	return de;
>  }
>  
> @@ -176,6 +171,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
>  	const char * name = dentry->d_name.name;
>  	int namelen = dentry->d_name.len;
>  	struct page *page = NULL;
> +	void *page_addr = NULL;
>  	struct sysv_dir_entry * de;
>  	unsigned long npages = dir_pages(dir);
>  	unsigned long n;
> @@ -185,11 +181,11 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
>  
>  	/* We take care of directory expansion in the same loop */
>  	for (n = 0; n <= npages; n++) {
> -		page = dir_get_page(dir, n);
> +		page = dir_get_page(dir, n, &page_addr);
>  		err = PTR_ERR(page);
>  		if (IS_ERR(page))
>  			goto out;
> -		kaddr = (char*)page_address(page);
> +		kaddr = page_addr;
>  		de = (struct sysv_dir_entry *)kaddr;
>  		kaddr += PAGE_SIZE - SYSV_DIRSIZE;
>  		while ((char *)de <= kaddr) {
> @@ -200,14 +196,13 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
>  				goto out_page;
>  			de++;
>  		}
> -		dir_put_page(page);
> +		dir_put_page(page, page_addr);
>  	}
>  	BUG();
>  	return -EINVAL;
>  
>  got_it:
> -	pos = page_offset(page) +
> -			(char*)de - (char*)page_address(page);
> +	pos = page_offset(page) + (char *)de - (char *)page_addr;
>  	lock_page(page);
>  	err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
>  	if (err)
> @@ -219,7 +214,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
>  	dir->i_mtime = dir->i_ctime = current_time(dir);
>  	mark_inode_dirty(dir);
>  out_page:
> -	dir_put_page(page);
> +	dir_put_page(page, page_addr);
>  out:
>  	return err;
>  out_unlock:
> @@ -227,10 +222,9 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
>  	goto out_page;
>  }
>  
> -int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
> +int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page, char *kaddr)
>  {
>  	struct inode *inode = page->mapping->host;
> -	char *kaddr = (char*)page_address(page);
>  	loff_t pos = page_offset(page) + (char *)de - kaddr;
>  	int err;
>  
> @@ -239,7 +233,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
>  	BUG_ON(err);
>  	de->inode = 0;
>  	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
> -	dir_put_page(page);
> +	dir_put_page(page, kaddr);
>  	inode->i_ctime = inode->i_mtime = current_time(inode);
>  	mark_inode_dirty(inode);
>  	return err;
> @@ -259,19 +253,15 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
>  		unlock_page(page);
>  		goto fail;
>  	}
> -	kmap(page);
> -
> -	base = (char*)page_address(page);
> +	base = kmap_local_page(page);
>  	memset(base, 0, PAGE_SIZE);
> -
>  	de = (struct sysv_dir_entry *) base;
>  	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
>  	strcpy(de->name,".");
>  	de++;
>  	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino);
>  	strcpy(de->name,"..");
> -
> -	kunmap(page);
> +	kunmap_local(base);
>  	err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
>  fail:
>  	put_page(page);
> @@ -286,16 +276,15 @@ int sysv_empty_dir(struct inode * inode)
>  	struct super_block *sb = inode->i_sb;
>  	struct page *page = NULL;
>  	unsigned long i, npages = dir_pages(inode);
> +	char *kaddr;
>  
>  	for (i = 0; i < npages; i++) {
> -		char *kaddr;
>  		struct sysv_dir_entry * de;
> -		page = dir_get_page(inode, i);
> +		page = dir_get_page(inode, i, (void **)&kaddr);
>  
>  		if (IS_ERR(page))
>  			continue;
>  
> -		kaddr = (char *)page_address(page);
>  		de = (struct sysv_dir_entry *)kaddr;
>  		kaddr += PAGE_SIZE-SYSV_DIRSIZE;
>  
> @@ -314,22 +303,21 @@ int sysv_empty_dir(struct inode * inode)
>  			if (de->name[1] != '.' || de->name[2])
>  				goto not_empty;
>  		}
> -		dir_put_page(page);
> +		dir_put_page(page, kaddr);
>  	}
>  	return 1;
>  
>  not_empty:
> -	dir_put_page(page);
> +	dir_put_page(page, kaddr);
>  	return 0;
>  }
>  
>  /* Releases the page */
>  void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
> -	struct inode *inode)
> +		   void *page_addr, struct inode *inode)
>  {
>  	struct inode *dir = page->mapping->host;
> -	loff_t pos = page_offset(page) +
> -			(char *)de-(char*)page_address(page);
> +	loff_t pos = page_offset(page) + (char *)de - (char *)page_addr;
>  	int err;
>  
>  	lock_page(page);
> @@ -337,19 +325,21 @@ void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
>  	BUG_ON(err);
>  	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
>  	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
> -	dir_put_page(page);
> +	dir_put_page(page, page_addr);
>  	dir->i_mtime = dir->i_ctime = current_time(dir);
>  	mark_inode_dirty(dir);
>  }
>  
> -struct sysv_dir_entry * sysv_dotdot (struct inode *dir, struct page **p)
> +struct sysv_dir_entry *sysv_dotdot(struct inode *dir, struct page **p, void **pa)
>  {
> -	struct page *page = dir_get_page(dir, 0);
> +	void *page_addr;
> +	struct page *page = dir_get_page(dir, 0, &page_addr);
>  	struct sysv_dir_entry *de = NULL;
>  
>  	if (!IS_ERR(page)) {
> -		de = (struct sysv_dir_entry*) page_address(page) + 1;
> +		de = (struct sysv_dir_entry *)page_addr + 1;
>  		*p = page;
> +		*pa = page_addr;
>  	}
>  	return de;
>  }
> @@ -357,12 +347,13 @@ struct sysv_dir_entry * sysv_dotdot (struct inode *dir, struct page **p)
>  ino_t sysv_inode_by_name(struct dentry *dentry)
>  {
>  	struct page *page;
> -	struct sysv_dir_entry *de = sysv_find_entry (dentry, &page);
> +	void *page_addr;
> +	struct sysv_dir_entry *de = sysv_find_entry(dentry, &page, &page_addr);
>  	ino_t res = 0;
>  	
>  	if (de) {
>  		res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode);
> -		dir_put_page(page);
> +		dir_put_page(page, page_addr);
>  	}
>  	return res;
>  }
> diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
> index b2e6abc06a2d..1371980ec5fb 100644
> --- a/fs/sysv/namei.c
> +++ b/fs/sysv/namei.c
> @@ -152,14 +152,15 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
>  {
>  	struct inode * inode = d_inode(dentry);
>  	struct page * page;
> +	void *page_addr;
>  	struct sysv_dir_entry * de;
>  	int err = -ENOENT;
>  
> -	de = sysv_find_entry(dentry, &page);
> +	de = sysv_find_entry(dentry, &page, &page_addr);
>  	if (!de)
>  		goto out;
>  
> -	err = sysv_delete_entry (de, page);
> +	err = sysv_delete_entry(de, page, page_addr);
>  	if (err)
>  		goto out;
>  
> @@ -196,26 +197,29 @@ static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
>  	struct inode * old_inode = d_inode(old_dentry);
>  	struct inode * new_inode = d_inode(new_dentry);
>  	struct page * dir_page = NULL;
> +	void *dir_page_addr;
>  	struct sysv_dir_entry * dir_de = NULL;
>  	struct page * old_page;
> +	void *old_page_addr;
>  	struct sysv_dir_entry * old_de;
>  	int err = -ENOENT;
>  
>  	if (flags & ~RENAME_NOREPLACE)
>  		return -EINVAL;
>  
> -	old_de = sysv_find_entry(old_dentry, &old_page);
> +	old_de = sysv_find_entry(old_dentry, &old_page, &old_page_addr);
>  	if (!old_de)
>  		goto out;
>  
>  	if (S_ISDIR(old_inode->i_mode)) {
>  		err = -EIO;
> -		dir_de = sysv_dotdot(old_inode, &dir_page);
> +		dir_de = sysv_dotdot(old_inode, &dir_page, &dir_page_addr);
>  		if (!dir_de)
>  			goto out_old;
>  	}
>  
>  	if (new_inode) {
> +		void *new_page_addr;
>  		struct page * new_page;
>  		struct sysv_dir_entry * new_de;
>  
> @@ -224,10 +228,10 @@ static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
>  			goto out_dir;
>  
>  		err = -ENOENT;
> -		new_de = sysv_find_entry(new_dentry, &new_page);
> +		new_de = sysv_find_entry(new_dentry, &new_page, &new_page_addr);
>  		if (!new_de)
>  			goto out_dir;
> -		sysv_set_link(new_de, new_page, old_inode);
> +		sysv_set_link(new_de, new_page, new_page_addr, old_inode);
>  		new_inode->i_ctime = current_time(new_inode);
>  		if (dir_de)
>  			drop_nlink(new_inode);
> @@ -240,23 +244,21 @@ static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
>  			inode_inc_link_count(new_dir);
>  	}
>  
> -	sysv_delete_entry(old_de, old_page);
> +	sysv_delete_entry(old_de, old_page, old_page_addr);
>  	mark_inode_dirty(old_inode);
>  
>  	if (dir_de) {
> -		sysv_set_link(dir_de, dir_page, new_dir);
> +		sysv_set_link(dir_de, dir_page, dir_page_addr, new_dir);
>  		inode_dec_link_count(old_dir);
>  	}
>  	return 0;
>  
>  out_dir:
>  	if (dir_de) {
> -		kunmap(dir_page);
> -		put_page(dir_page);
> +		dir_put_page(dir_page, dir_page_addr);
>  	}
>  out_old:
> -	kunmap(old_page);
> -	put_page(old_page);
> +	dir_put_page(old_page, old_page_addr);
>  out:
>  	return err;
>  }
> diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
> index 99ddf033da4f..b0631ea6b506 100644
> --- a/fs/sysv/sysv.h
> +++ b/fs/sysv/sysv.h
> @@ -119,6 +119,11 @@ static inline void dirty_sb(struct super_block *sb)
>  		mark_buffer_dirty(sbi->s_bh2);
>  }
>  
> +static inline void dir_put_page(struct page *page, void *page_addr)
> +{
> +	kunmap_local(page_addr);
> +	put_page(page);
> +}
>  
>  /* ialloc.c */
>  extern struct sysv_inode *sysv_raw_inode(struct super_block *, unsigned,
> @@ -148,14 +153,18 @@ extern void sysv_destroy_icache(void);
>  
>  
>  /* dir.c */
> -extern struct sysv_dir_entry *sysv_find_entry(struct dentry *, struct page **);
> +extern struct sysv_dir_entry *sysv_find_entry(struct dentry *dir,
> +					      struct page **res_page,
> +					      void **res_page_addr);
>  extern int sysv_add_link(struct dentry *, struct inode *);
> -extern int sysv_delete_entry(struct sysv_dir_entry *, struct page *);
> +extern int sysv_delete_entry(struct sysv_dir_entry *dir, struct page *page,
> +			     char *kaddr);
>  extern int sysv_make_empty(struct inode *, struct inode *);
>  extern int sysv_empty_dir(struct inode *);
> -extern void sysv_set_link(struct sysv_dir_entry *, struct page *,
> -			struct inode *);
> -extern struct sysv_dir_entry *sysv_dotdot(struct inode *, struct page **);
> +extern void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
> +			  void *page_addr, struct inode *inode);
> +extern struct sysv_dir_entry *sysv_dotdot(struct inode *inode,
> +					  struct page **page, void **page_addr);
>  extern ino_t sysv_inode_by_name(struct dentry *);
>  
>  
> -- 
> 2.37.2
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ