When fixing the rename() vs rmdir() deadlock, we stopped locking default groups' inodes in configfs_detach_prep(), letting racing mkdir() in default groups proceed concurrently. This enables races like below happen, which leads to a failing mkdir() making rmdir() fail, despite the group to remove having no user-created directory under it in the end. process A: process B: /* PWD=A/B */ mkdir("C") make_item("C") attach_group("C") rmdir("A") detach_prep("A") detach_prep("B") error because of "C" return -ENOTEMPTY attach_group("C/D") error (eg -ENOMEM) return -ENOMEM This patch prevents such scenarii by making rmdir() wait as long as detach_prep() fails because a racing mkdir() is in the middle of attach_group(). To achieve this, mkdir() sets a flag CONFIGFS_USET_IN_MKDIR in parent's configfs_dirent before calling attach_group(), and clears the flag once attach_group() is done. detach_prep() fails with -EAGAIN whenever the flag is hit and returns the guilty inode's mutex so that rmdir() can wait on it. Signed-off-by: Louis Rilling --- fs/configfs/configfs_internal.h | 1 fs/configfs/dir.c | 53 ++++++++++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 10 deletions(-) Index: b/fs/configfs/configfs_internal.h =================================================================== --- a/fs/configfs/configfs_internal.h 2008-06-16 18:06:27.000000000 +0200 +++ b/fs/configfs/configfs_internal.h 2008-06-16 18:06:54.000000000 +0200 @@ -48,6 +48,7 @@ struct configfs_dirent { #define CONFIGFS_USET_DIR 0x0040 #define CONFIGFS_USET_DEFAULT 0x0080 #define CONFIGFS_USET_DROPPING 0x0100 +#define CONFIGFS_USET_IN_MKDIR 0x0200 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) extern spinlock_t configfs_dirent_lock; Index: b/fs/configfs/dir.c =================================================================== --- a/fs/configfs/dir.c 2008-06-16 18:06:50.000000000 +0200 +++ b/fs/configfs/dir.c 2008-06-16 18:52:50.000000000 +0200 @@ -364,7 +364,7 @@ static struct dentry * configfs_lookup(s * If there is an error, the caller will reset the flags via * configfs_detach_rollback(). */ -static int configfs_detach_prep(struct dentry *dentry) +static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; @@ -379,6 +379,12 @@ static int configfs_detach_prep(struct d if (sd->s_type & CONFIGFS_NOT_PINNED) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { + /* Abort if racing with mkdir() */ + if (sd->s_type & CONFIGFS_USET_IN_MKDIR) { + if (wait_mutex) + *wait_mutex = &sd->s_dentry->d_inode->i_mutex; + return -EAGAIN; + } /* Mark that we're trying to drop the group */ sd->s_type |= CONFIGFS_USET_DROPPING; @@ -386,7 +392,7 @@ static int configfs_detach_prep(struct d * Yup, recursive. If there's a problem, blame * deep nesting of default_groups */ - ret = configfs_detach_prep(sd->s_dentry); + ret = configfs_detach_prep(sd->s_dentry, wait_mutex); if (!ret) continue; } else @@ -1113,11 +1119,26 @@ static int configfs_mkdir(struct inode * */ module_got = 1; + /* + * Make racing rmdir() fail if it did not tag parent with + * CONFIGFS_USET_DROPPING + * Note: if CONFIGFS_USET_DROPPING is already set, attach_group() will + * fail and let rmdir() terminate correctly + */ + spin_lock(&configfs_dirent_lock); + /* This will make configfs_detach_prep() fail */ + sd->s_type |= CONFIGFS_USET_IN_MKDIR; + spin_unlock(&configfs_dirent_lock); + if (group) ret = configfs_attach_group(parent_item, item, dentry); else ret = configfs_attach_item(parent_item, item, dentry); + spin_lock(&configfs_dirent_lock); + sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; + spin_unlock(&configfs_dirent_lock); + out_unlink: if (ret) { /* Tear down everything we built up */ @@ -1182,13 +1203,25 @@ static int configfs_rmdir(struct inode * } spin_lock(&configfs_dirent_lock); - ret = configfs_detach_prep(dentry); - if (ret) { - configfs_detach_rollback(dentry); - spin_unlock(&configfs_dirent_lock); - config_item_put(parent_item); - return ret; - } + do { + struct mutex *wait_mutex; + + ret = configfs_detach_prep(dentry, &wait_mutex); + if (ret) { + configfs_detach_rollback(dentry); + spin_unlock(&configfs_dirent_lock); + if (ret != -EAGAIN) { + config_item_put(parent_item); + return ret; + } + + /* Wait until the racing operation terminates */ + mutex_lock(wait_mutex); + mutex_unlock(wait_mutex); + + spin_lock(&configfs_dirent_lock); + } + } while (ret == -EAGAIN); spin_unlock(&configfs_dirent_lock); /* Get a working ref for the duration of this function */ @@ -1480,7 +1513,7 @@ void configfs_unregister_subsystem(struc I_MUTEX_PARENT); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); spin_lock(&configfs_dirent_lock); - if (configfs_detach_prep(dentry)) { + if (configfs_detach_prep(dentry, NULL)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } spin_unlock(&configfs_dirent_lock); -- Dr Louis Rilling Kerlabs Skype: louis.rilling Batiment Germanium Phone: (+33|0) 6 80 89 08 23 80 avenue des Buttes de Coesmes http://www.kerlabs.com/ 35700 Rennes -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/