lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251031-bildung-erdig-b18b71db0520@brauner>
Date: Fri, 31 Oct 2025 13:23:33 +0100
From: Christian Brauner <brauner@...nel.org>
To: Jeff Layton <jlayton@...nel.org>
Cc: Miklos Szeredi <miklos@...redi.hu>, 
	Alexander Viro <viro@...iv.linux.org.uk>, Jan Kara <jack@...e.cz>, Chuck Lever <chuck.lever@...cle.com>, 
	Alexander Aring <alex.aring@...il.com>, Trond Myklebust <trondmy@...nel.org>, 
	Anna Schumaker <anna@...nel.org>, Steve French <sfrench@...ba.org>, 
	Paulo Alcantara <pc@...guebit.org>, Ronnie Sahlberg <ronniesahlberg@...il.com>, 
	Shyam Prasad N <sprasad@...rosoft.com>, Tom Talpey <tom@...pey.com>, 
	Bharath SM <bharathsm@...rosoft.com>, Greg Kroah-Hartman <gregkh@...uxfoundation.org>, 
	"Rafael J. Wysocki" <rafael@...nel.org>, Danilo Krummrich <dakr@...nel.org>, 
	David Howells <dhowells@...hat.com>, Tyler Hicks <code@...icks.com>, NeilBrown <neil@...wn.name>, 
	Olga Kornievskaia <okorniev@...hat.com>, Dai Ngo <Dai.Ngo@...cle.com>, 
	Amir Goldstein <amir73il@...il.com>, Namjae Jeon <linkinjeon@...nel.org>, 
	Steve French <smfrench@...il.com>, Sergey Senozhatsky <senozhatsky@...omium.org>, 
	Carlos Maiolino <cem@...nel.org>, Kuniyuki Iwashima <kuniyu@...gle.com>, 
	"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, 
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>, 
	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org, linux-nfs@...r.kernel.org, 
	linux-cifs@...r.kernel.org, samba-technical@...ts.samba.org, netfs@...ts.linux.dev, 
	ecryptfs@...r.kernel.org, linux-unionfs@...r.kernel.org, linux-xfs@...r.kernel.org, 
	netdev@...r.kernel.org
Subject: Re: [PATCH v3 03/13] vfs: allow mkdir to wait for delegation break
 on parent

On Wed, Oct 29, 2025 at 09:37:22AM -0400, Jeff Layton wrote:
> On Wed, 2025-10-29 at 14:04 +0100, Christian Brauner wrote:
> > On Tue, Oct 21, 2025 at 11:25:38AM -0400, Jeff Layton wrote:
> > > In order to add directory delegation support, we need to break
> > > delegations on the parent whenever there is going to be a change in the
> > > directory.
> > > 
> > > Add a new delegated_inode parameter to vfs_mkdir. All of the existing
> > > callers set that to NULL for now, except for do_mkdirat which will
> > > properly block until the lease is gone.
> > > 
> > > Reviewed-by: Jan Kara <jack@...e.cz>
> > > Reviewed-by: NeilBrown <neil@...wn.name>
> > > Signed-off-by: Jeff Layton <jlayton@...nel.org>
> > > ---
> > >  drivers/base/devtmpfs.c  |  2 +-
> > >  fs/cachefiles/namei.c    |  2 +-
> > >  fs/ecryptfs/inode.c      |  2 +-
> > >  fs/init.c                |  2 +-
> > >  fs/namei.c               | 24 ++++++++++++++++++------
> > >  fs/nfsd/nfs4recover.c    |  2 +-
> > >  fs/nfsd/vfs.c            |  2 +-
> > >  fs/overlayfs/overlayfs.h |  2 +-
> > >  fs/smb/server/vfs.c      |  2 +-
> > >  fs/xfs/scrub/orphanage.c |  2 +-
> > >  include/linux/fs.h       |  2 +-
> > >  11 files changed, 28 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
> > > index 9d4e46ad8352257a6a65d85526ebdbf9bf2d4b19..0e79621cb0f79870003b867ca384199171ded4e0 100644
> > > --- a/drivers/base/devtmpfs.c
> > > +++ b/drivers/base/devtmpfs.c
> > > @@ -180,7 +180,7 @@ static int dev_mkdir(const char *name, umode_t mode)
> > >  	if (IS_ERR(dentry))
> > >  		return PTR_ERR(dentry);
> > >  
> > > -	dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
> > > +	dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, NULL);
> > >  	if (!IS_ERR(dentry))
> > >  		/* mark as kernel-created inode */
> > >  		d_inode(dentry)->i_private = &thread;
> > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > index d1edb2ac38376c4f9d2a18026450bb3c774f7824..50c0f9c76d1fd4c05db90d7d0d1bad574523ead0 100644
> > > --- a/fs/cachefiles/namei.c
> > > +++ b/fs/cachefiles/namei.c
> > > @@ -130,7 +130,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
> > >  			goto mkdir_error;
> > >  		ret = cachefiles_inject_write_error();
> > >  		if (ret == 0)
> > > -			subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
> > > +			subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL);
> > >  		else
> > >  			subdir = ERR_PTR(ret);
> > >  		if (IS_ERR(subdir)) {
> > > diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
> > > index ed1394da8d6bd7065f2a074378331f13fcda17f9..35830b3144f8f71374a78b3e7463b864f4fc216e 100644
> > > --- a/fs/ecryptfs/inode.c
> > > +++ b/fs/ecryptfs/inode.c
> > > @@ -508,7 +508,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> > >  		goto out;
> > >  
> > >  	lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir,
> > > -				 lower_dentry, mode);
> > > +				 lower_dentry, mode, NULL);
> > >  	rc = PTR_ERR(lower_dentry);
> > >  	if (IS_ERR(lower_dentry))
> > >  		goto out;
> > > diff --git a/fs/init.c b/fs/init.c
> > > index 07f592ccdba868509d0f3aaf9936d8d890fdbec5..895f8a09a71acfd03e11164e3b441a7d4e2de146 100644
> > > --- a/fs/init.c
> > > +++ b/fs/init.c
> > > @@ -233,7 +233,7 @@ int __init init_mkdir(const char *pathname, umode_t mode)
> > >  	error = security_path_mkdir(&path, dentry, mode);
> > >  	if (!error) {
> > >  		dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
> > > -				  dentry, mode);
> > > +				  dentry, mode, NULL);
> > >  		if (IS_ERR(dentry))
> > >  			error = PTR_ERR(dentry);
> > >  	}
> > > diff --git a/fs/namei.c b/fs/namei.c
> > > index 6e61e0215b34134b1690f864e2719e3f82cf71a8..86cf6eca1f485361c6732974e4103cf5ea721539 100644
> > > --- a/fs/namei.c
> > > +++ b/fs/namei.c
> > > @@ -4407,10 +4407,11 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
> > >  
> > >  /**
> > >   * vfs_mkdir - create directory returning correct dentry if possible
> > > - * @idmap:	idmap of the mount the inode was found from
> > > - * @dir:	inode of the parent directory
> > > - * @dentry:	dentry of the child directory
> > > - * @mode:	mode of the child directory
> > > + * @idmap:		idmap of the mount the inode was found from
> > > + * @dir:		inode of the parent directory
> > > + * @dentry:		dentry of the child directory
> > > + * @mode:		mode of the child directory
> > > + * @delegated_inode:	returns parent inode, if the inode is delegated.
> > 
> > I wonder if it would be feasible and potentially elegant if delegated
> > inodes were returned as separate type like struct delegated_inode
> > similar to the vfsuid_t just a struct wrapper around the inode itself.
> > The advantage is that it's not possible to accidently abuse this thing
> > as we're passing that stuff around to try_break_deleg() and so on.
> > 
> 
> I have a patch that does exactly that:
> 
> https://lore.kernel.org/linux-nfs/20250924-dir-deleg-v3-15-9f3af8bc5c40@kernel.org/

I love it!

> 
> I didn't submit it here since it wasn't strictly required for this
> patchset. If we get around to implementing CB_NOTIFY support however,
> it will be since we'll need to pass back other information than just
> the inode.
> 
> I could move that into this series if you prefer. If we do that though,
> then it might also be cleaner to take the previous patch in the series
> that cleans up __break_lease() arguments:
> 
> https://lore.kernel.org/linux-nfs/20250924-dir-deleg-v3-14-9f3af8bc5c40@kernel.org/

If you have a wholesome story to tell then by all means tell it all.
Don't GRRM us with half a tale.  IOW, it's fine to have a large patch
series. If it's well split-up then it's great. Al or I apparently can't
get a series out the door that's under 20 patches so you get the same
leeway. :)

> 
> Let me know what you'd prefer.
> 
> > >   *
> > >   * Create a directory.
> > >   *
> > > @@ -4427,7 +4428,8 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
> > >   * In case of an error the dentry is dput() and an ERR_PTR() is returned.
> > >   */
> > >  struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> > > -			 struct dentry *dentry, umode_t mode)
> > > +			 struct dentry *dentry, umode_t mode,
> > > +			 struct inode **delegated_inode)
> > >  {
> > >  	int error;
> > >  	unsigned max_links = dir->i_sb->s_max_links;
> > > @@ -4450,6 +4452,10 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> > >  	if (max_links && dir->i_nlink >= max_links)
> > >  		goto err;
> > >  
> > > +	error = try_break_deleg(dir, delegated_inode);
> > > +	if (error)
> > > +		goto err;
> > > +
> > >  	de = dir->i_op->mkdir(idmap, dir, dentry, mode);
> > >  	error = PTR_ERR(de);
> > >  	if (IS_ERR(de))
> > > @@ -4473,6 +4479,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
> > >  	struct path path;
> > >  	int error;
> > >  	unsigned int lookup_flags = LOOKUP_DIRECTORY;
> > > +	struct inode *delegated_inode = NULL;
> > >  
> > >  retry:
> > >  	dentry = filename_create(dfd, name, &path, lookup_flags);
> > > @@ -4484,11 +4491,16 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
> > >  			mode_strip_umask(path.dentry->d_inode, mode));
> > >  	if (!error) {
> > >  		dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
> > > -				  dentry, mode);
> > > +				   dentry, mode, &delegated_inode);
> > >  		if (IS_ERR(dentry))
> > >  			error = PTR_ERR(dentry);
> > >  	}
> > >  	end_creating_path(&path, dentry);
> > > +	if (delegated_inode) {
> > > +		error = break_deleg_wait(&delegated_inode);
> > > +		if (!error)
> > > +			goto retry;
> > > +	}
> > >  	if (retry_estale(error, lookup_flags)) {
> > >  		lookup_flags |= LOOKUP_REVAL;
> > >  		goto retry;
> > > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
> > > index b1005abcb9035b2cf743200808a251b00af7e3f4..423dd102b51198ea7c447be2b9a0a5020c950dba 100644
> > > --- a/fs/nfsd/nfs4recover.c
> > > +++ b/fs/nfsd/nfs4recover.c
> > > @@ -202,7 +202,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
> > >  		 * as well be forgiving and just succeed silently.
> > >  		 */
> > >  		goto out_put;
> > > -	dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
> > > +	dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, 0700, NULL);
> > >  	if (IS_ERR(dentry))
> > >  		status = PTR_ERR(dentry);
> > >  out_put:
> > > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> > > index 8b2dc7a88aab015d1e39da0dd4e6daf7e276aabe..5f24af289d509bea54a324b8851fa06de6050353 100644
> > > --- a/fs/nfsd/vfs.c
> > > +++ b/fs/nfsd/vfs.c
> > > @@ -1645,7 +1645,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > >  			nfsd_check_ignore_resizing(iap);
> > >  		break;
> > >  	case S_IFDIR:
> > > -		dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
> > > +		dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode, NULL);
> > >  		if (IS_ERR(dchild)) {
> > >  			host_err = PTR_ERR(dchild);
> > >  		} else if (d_is_negative(dchild)) {
> > > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > > index c8fd5951fc5ece1ae6b3e2a0801ca15f9faf7d72..0f65f9a5d54d4786b39e4f4f30f416d5b9016e70 100644
> > > --- a/fs/overlayfs/overlayfs.h
> > > +++ b/fs/overlayfs/overlayfs.h
> > > @@ -248,7 +248,7 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs,
> > >  {
> > >  	struct dentry *ret;
> > >  
> > > -	ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
> > > +	ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, NULL);
> > >  	pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret));
> > >  	return ret;
> > >  }
> > > diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
> > > index 891ed2dc2b7351a5cb14a2241d71095ffdd03f08..3d2190f26623b23ea79c63410905a3c3ad684048 100644
> > > --- a/fs/smb/server/vfs.c
> > > +++ b/fs/smb/server/vfs.c
> > > @@ -230,7 +230,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
> > >  	idmap = mnt_idmap(path.mnt);
> > >  	mode |= S_IFDIR;
> > >  	d = dentry;
> > > -	dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
> > > +	dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode, NULL);
> > >  	if (IS_ERR(dentry))
> > >  		err = PTR_ERR(dentry);
> > >  	else if (d_is_negative(dentry))
> > > diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
> > > index 9c12cb8442311ca26b169e4d1567939ae44a5be0..91c9d07b97f306f57aebb9b69ba564b0c2cb8c17 100644
> > > --- a/fs/xfs/scrub/orphanage.c
> > > +++ b/fs/xfs/scrub/orphanage.c
> > > @@ -167,7 +167,7 @@ xrep_orphanage_create(
> > >  	 */
> > >  	if (d_really_is_negative(orphanage_dentry)) {
> > >  		orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
> > > -					     orphanage_dentry, 0750);
> > > +					     orphanage_dentry, 0750, NULL);
> > >  		error = PTR_ERR(orphanage_dentry);
> > >  		if (IS_ERR(orphanage_dentry))
> > >  			goto out_unlock_root;
> > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > index c895146c1444be36e0a779df55622cc38c9419ff..1040df3792794cd353b86558b41618294e25b8a6 100644
> > > --- a/include/linux/fs.h
> > > +++ b/include/linux/fs.h
> > > @@ -2113,7 +2113,7 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap,
> > >  int vfs_create(struct mnt_idmap *, struct inode *,
> > >  	       struct dentry *, umode_t, bool);
> > >  struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
> > > -			 struct dentry *, umode_t);
> > > +			 struct dentry *, umode_t, struct inode **);
> > >  int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
> > >                umode_t, dev_t);
> > >  int vfs_symlink(struct mnt_idmap *, struct inode *,
> > > 
> > > -- 
> > > 2.51.0
> > > 
> 
> -- 
> Jeff Layton <jlayton@...nel.org>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ