lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Mon, 23 Sep 2019 11:14:06 +0530
From:   Ritesh Harjani <riteshh@...ux.ibm.com>
To:     jack@...e.cz, tytso@....edu, linux-ext4@...r.kernel.org,
        adilger@...ger.ca
Cc:     joseph.qi@...ux.alibaba.com, david@...morbit.com,
        hch@...radead.org, riteshh@...ux.ibm.com,
        mbobrowski@...browski.org, rgoldwyn@...e.de,
        aneesh.kumar@...ux.ibm.com, linux-kernel@...r.kernel.org
Subject: [RFC-v2 1/2] ext4: Add ext4_ilock & ext4_iunlock API

This adds ext4_ilock/iunlock types of APIs.
This is the preparation APIs to make shared
locking/unlocking & restarting with exclusive
locking/unlocking easier in next patch.

Along with above this also addresses the AIM7
regression problem which was only fixed for XFS
in,
commit 942491c9e6d6 ("xfs: fix AIM7 regression")

Signed-off-by: Ritesh Harjani <riteshh@...ux.ibm.com>
---
 fs/ext4/ext4.h    | 33 +++++++++++++++++++++++++
 fs/ext4/extents.c | 16 ++++++------
 fs/ext4/file.c    | 63 +++++++++++++++++++++++++----------------------
 fs/ext4/inode.c   |  4 +--
 fs/ext4/ioctl.c   | 16 ++++++------
 fs/ext4/super.c   | 12 ++++-----
 fs/ext4/xattr.c   | 16 ++++++------
 7 files changed, 98 insertions(+), 62 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2ab91815f52d..9ffafbe6bc3f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2945,6 +2945,39 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
 	return changed;
 }
 
+#define EXT4_IOLOCK_EXCL	(1 << 0)
+#define EXT4_IOLOCK_SHARED	(1 << 1)
+
+static inline void ext4_ilock(struct inode *inode, unsigned int iolock)
+{
+	if (iolock == EXT4_IOLOCK_EXCL)
+		inode_lock(inode);
+	else
+		inode_lock_shared(inode);
+}
+
+static inline void ext4_iunlock(struct inode *inode, unsigned int iolock)
+{
+	if (iolock == EXT4_IOLOCK_EXCL)
+		inode_unlock(inode);
+	else
+		inode_unlock_shared(inode);
+}
+
+static inline int ext4_ilock_nowait(struct inode *inode, unsigned int iolock)
+{
+	if (iolock == EXT4_IOLOCK_EXCL)
+		return inode_trylock(inode);
+	else
+		return inode_trylock_shared(inode);
+}
+
+static inline void ext4_ilock_demote(struct inode *inode, unsigned int iolock)
+{
+	BUG_ON(iolock != EXT4_IOLOCK_EXCL);
+	downgrade_write(&inode->i_rwsem);
+}
+
 int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
 				      loff_t len);
 
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a869e206bd81..ef37f4d4ee7e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4680,7 +4680,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	else
 		max_blocks -= lblk;
 
-	inode_lock(inode);
+	ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 
 	/*
 	 * Indirect files do not support unwritten extnets
@@ -4790,7 +4790,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
 	ext4_journal_stop(handle);
 out_mutex:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	return ret;
 }
 
@@ -4856,7 +4856,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (mode & FALLOC_FL_KEEP_SIZE)
 		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
-	inode_lock(inode);
+	ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 
 	/*
 	 * We only support preallocation for extent-based files only
@@ -4887,7 +4887,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 						EXT4_I(inode)->i_sync_tid);
 	}
 out:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
 	return ret;
 }
@@ -5387,7 +5387,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 			return ret;
 	}
 
-	inode_lock(inode);
+	ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 	/*
 	 * There is no need to overlap collapse range with EOF, in which case
 	 * it is effectively a truncate operation
@@ -5486,7 +5486,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 out_mmap:
 	up_write(&EXT4_I(inode)->i_mmap_sem);
 out_mutex:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	return ret;
 }
 
@@ -5537,7 +5537,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 			return ret;
 	}
 
-	inode_lock(inode);
+	ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 	/* Currently just for extent based files */
 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 		ret = -EOPNOTSUPP;
@@ -5664,7 +5664,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 out_mmap:
 	up_write(&EXT4_I(inode)->i_mmap_sem);
 out_mutex:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	return ret;
 }
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d2ff383a8b9f..ce1cecbae932 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -57,14 +57,15 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	/*
 	 * Get exclusion from truncate and other inode operations.
 	 */
-	if (!inode_trylock_shared(inode)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!ext4_ilock_nowait(inode, EXT4_IOLOCK_SHARED))
 			return -EAGAIN;
-		inode_lock_shared(inode);
+	} else {
+		ext4_ilock(inode, EXT4_IOLOCK_SHARED);
 	}
 
 	if (!ext4_dio_checks(inode)) {
-		inode_unlock_shared(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
 		/*
 		 * Fallback to buffered IO if the operation being
 		 * performed on the inode is not supported by direct
@@ -77,7 +78,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	}
 
 	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL);
-	inode_unlock_shared(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
 
 	file_accessed(iocb->ki_filp);
 	return ret;
@@ -89,22 +90,23 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
-	if (!inode_trylock_shared(inode)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!ext4_ilock_nowait(inode, EXT4_IOLOCK_SHARED))
 			return -EAGAIN;
-		inode_lock_shared(inode);
+	} else {
+		ext4_ilock(inode, EXT4_IOLOCK_SHARED);
 	}
 	/*
 	 * Recheck under inode lock - at this point we are sure it cannot
 	 * change anymore
 	 */
 	if (!IS_DAX(inode)) {
-		inode_unlock_shared(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
 		/* Fallback to buffered IO in case we cannot support DAX */
 		return generic_file_read_iter(iocb, to);
 	}
 	ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
-	inode_unlock_shared(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
 
 	file_accessed(iocb->ki_filp);
 	return ret;
@@ -241,7 +243,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
 	if (iocb->ki_flags & IOCB_NOWAIT)
 		return -EOPNOTSUPP;
 
-	inode_lock(inode);
+	ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 	ret = ext4_write_checks(iocb, from);
 	if (ret <= 0)
 		goto out;
@@ -250,7 +252,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
 	ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
 	current->backing_dev_info = NULL;
 out:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	if (likely(ret > 0)) {
 		iocb->ki_pos += ret;
 		ret = generic_write_sync(iocb, ret);
@@ -374,15 +376,17 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	loff_t offset = iocb->ki_pos;
 	struct inode *inode = file_inode(iocb->ki_filp);
 	bool extend = false, overwrite = false, unaligned_aio = false;
+	unsigned int iolock = EXT4_IOLOCK_EXCL;
 
-	if (!inode_trylock(inode)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!ext4_ilock_nowait(inode, iolock))
 			return -EAGAIN;
-		inode_lock(inode);
+	} else {
+		ext4_ilock(inode, iolock);
 	}
 
 	if (!ext4_dio_checks(inode)) {
-		inode_unlock(inode);
+		ext4_iunlock(inode, iolock);
 		/*
 		 * Fallback to buffered IO if the operation on the
 		 * inode is not supported by direct IO.
@@ -392,7 +396,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	ret = ext4_write_checks(iocb, from);
 	if (ret <= 0) {
-		inode_unlock(inode);
+		ext4_iunlock(inode, iolock);
 		return ret;
 	}
 
@@ -416,7 +420,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
 	    ext4_should_dioread_nolock(inode)) {
 		overwrite = true;
-		downgrade_write(&inode->i_rwsem);
+		ext4_ilock_demote(inode, iolock);
+		iolock = EXT4_IOLOCK_SHARED;
 	}
 
 	if (offset + count > i_size_read(inode) ||
@@ -438,10 +443,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (ret == -EIOCBQUEUED && (unaligned_aio || extend))
 		inode_dio_wait(inode);
 
-	if (overwrite)
-		inode_unlock_shared(inode);
-	else
-		inode_unlock(inode);
+	ext4_iunlock(inode, iolock);
 
 	if (ret >= 0 && iov_iter_count(from))
 		return ext4_buffered_write_iter(iocb, from);
@@ -457,10 +459,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	loff_t offset;
 	struct inode *inode = file_inode(iocb->ki_filp);
 
-	if (!inode_trylock(inode)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!ext4_ilock_nowait(inode, EXT4_IOLOCK_EXCL))
 			return -EAGAIN;
-		inode_lock(inode);
+	} else {
+		ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 	}
 
 	ret = ext4_write_checks(iocb, from);
@@ -480,7 +483,7 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (error)
 		ret = error;
 out:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	if (ret > 0)
 		ret = generic_write_sync(iocb, ret);
 	return ret;
@@ -707,14 +710,14 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 		return generic_file_llseek_size(file, offset, whence,
 						maxbytes, i_size_read(inode));
 	case SEEK_HOLE:
-		inode_lock_shared(inode);
+		ext4_ilock(inode, EXT4_IOLOCK_SHARED);
 		offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
-		inode_unlock_shared(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
 		break;
 	case SEEK_DATA:
-		inode_lock_shared(inode);
+		ext4_ilock(inode, EXT4_IOLOCK_SHARED);
 		offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
-		inode_unlock_shared(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_SHARED);
 		break;
 	}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4f0749527c7..2870699ee504 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3914,7 +3914,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 			return ret;
 	}
 
-	inode_lock(inode);
+	ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 
 	/* No need to punch hole beyond i_size */
 	if (offset >= inode->i_size)
@@ -4021,7 +4021,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 out_dio:
 	up_write(&EXT4_I(inode)->i_mmap_sem);
 out_mutex:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	return ret;
 }
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 442f7ef873fc..c6ae48567207 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -787,13 +787,13 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (err)
 			return err;
 
-		inode_lock(inode);
+		ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 		err = ext4_ioctl_check_immutable(inode,
 				from_kprojid(&init_user_ns, ei->i_projid),
 				flags);
 		if (!err)
 			err = ext4_ioctl_setflags(inode, flags);
-		inode_unlock(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 		mnt_drop_write_file(filp);
 		return err;
 	}
@@ -824,7 +824,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			goto setversion_out;
 		}
 
-		inode_lock(inode);
+		ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
 		if (IS_ERR(handle)) {
 			err = PTR_ERR(handle);
@@ -839,7 +839,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		ext4_journal_stop(handle);
 
 unlock_out:
-		inode_unlock(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 setversion_out:
 		mnt_drop_write_file(filp);
 		return err;
@@ -958,9 +958,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		 * ext4_ext_swap_inode_data before we switch the
 		 * inode format to prevent read.
 		 */
-		inode_lock((inode));
+		ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 		err = ext4_ext_migrate(inode);
-		inode_unlock((inode));
+		ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 		mnt_drop_write_file(filp);
 		return err;
 	}
@@ -1150,7 +1150,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (err)
 			return err;
 
-		inode_lock(inode);
+		ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 		ext4_fill_fsxattr(inode, &old_fa);
 		err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
 		if (err)
@@ -1165,7 +1165,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			goto out;
 		err = ext4_ioctl_setproject(filp, fa.fsx_projid);
 out:
-		inode_unlock(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 		mnt_drop_write_file(filp);
 		return err;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4079605d437a..45519036de83 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2681,12 +2681,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 					__func__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
 				  inode->i_ino, inode->i_size);
-			inode_lock(inode);
+			ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 			truncate_inode_pages(inode->i_mapping, inode->i_size);
 			ret = ext4_truncate(inode);
 			if (ret)
 				ext4_std_error(inode->i_sb, ret);
-			inode_unlock(inode);
+			ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 			nr_truncates++;
 		} else {
 			if (test_opt(sb, DEBUG))
@@ -5763,7 +5763,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		 * files. If this fails, we return success anyway since quotas
 		 * are already enabled and this is not a hard failure.
 		 */
-		inode_lock(inode);
+		ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 		handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
 		if (IS_ERR(handle))
 			goto unlock_inode;
@@ -5773,7 +5773,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		ext4_mark_inode_dirty(handle, inode);
 		ext4_journal_stop(handle);
 	unlock_inode:
-		inode_unlock(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	}
 	return err;
 }
@@ -5865,7 +5865,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
 	if (err || ext4_has_feature_quota(sb))
 		goto out_put;
 
-	inode_lock(inode);
+	ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 	/*
 	 * Update modification times of quota files when userspace can
 	 * start looking at them. If we fail, we return success anyway since
@@ -5880,7 +5880,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
 	ext4_mark_inode_dirty(handle, inode);
 	ext4_journal_stop(handle);
 out_unlock:
-	inode_unlock(inode);
+	ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 out_put:
 	lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
 	iput(inode);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 491f9ee4040e..dbe3e2900c24 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -422,9 +422,9 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
 		ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
 		ext4_xattr_inode_set_ref(inode, 1);
 	} else {
-		inode_lock(inode);
+		ext4_ilock(inode, EXT4_IOLOCK_EXCL);
 		inode->i_flags |= S_NOQUOTA;
-		inode_unlock(inode);
+		ext4_iunlock(inode, EXT4_IOLOCK_EXCL);
 	}
 
 	*ea_inode = inode;
@@ -1025,7 +1025,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
 	u32 hash;
 	int ret;
 
-	inode_lock(ea_inode);
+	ext4_ilock(ea_inode, EXT4_IOLOCK_EXCL);
 
 	ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
 	if (ret)
@@ -1079,7 +1079,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
 		ext4_warning_inode(ea_inode,
 				   "ext4_mark_iloc_dirty() failed ret=%d", ret);
 out:
-	inode_unlock(ea_inode);
+	ext4_iunlock(ea_inode, EXT4_IOLOCK_EXCL);
 	return ret;
 }
 
@@ -1400,10 +1400,10 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
 		block += 1;
 	}
 
-	inode_lock(ea_inode);
+	ext4_ilock(ea_inode, EXT4_IOLOCK_EXCL);
 	i_size_write(ea_inode, wsize);
 	ext4_update_i_disksize(ea_inode, wsize);
-	inode_unlock(ea_inode);
+	ext4_iunlock(ea_inode, EXT4_IOLOCK_EXCL);
 
 	ext4_mark_inode_dirty(handle, ea_inode);
 
@@ -1452,9 +1452,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
 		 */
 		dquot_free_inode(ea_inode);
 		dquot_drop(ea_inode);
-		inode_lock(ea_inode);
+		ext4_ilock(ea_inode, EXT4_IOLOCK_EXCL);
 		ea_inode->i_flags |= S_NOQUOTA;
-		inode_unlock(ea_inode);
+		ext4_iunlock(ea_inode, EXT4_IOLOCK_EXCL);
 	}
 
 	return ea_inode;
-- 
2.21.0

Powered by blists - more mailing lists