lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue,  1 Oct 2019 00:41:00 -0700
From:   Harshad Shirwadkar <harshadshirwadkar@...il.com>
To:     linux-ext4@...r.kernel.org
Cc:     Harshad Shirwadkar <harshadshirwadkar@...il.com>
Subject: [PATCH v3 11/13] ext4: add support for asynchronous fast commits

Until this patch, fast commits could only be invoked by jbd2 thread.
This patch allows file system to perform fast commit in an async manner
without involving jbd2 thread. This makes fast commits even faster as
it gets rid of the time spent in context switching to jbd2 thread. In
order to avoid race between jbd2 thread and async fast commits, we add
new jbd2 APIs that allow file systems to indicate their intent of
performing an async fast commit.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@...il.com>
---
 fs/ext4/ext4.h        |  3 ++
 fs/ext4/ext4_jbd2.c   | 74 +++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/fsync.c       |  7 ++--
 fs/jbd2/commit.c      | 11 +++++++
 fs/jbd2/journal.c     | 59 ++++++++++++++++++++++++++++++++++
 fs/jbd2/transaction.c |  2 ++
 include/linux/jbd2.h  | 10 ++++++
 7 files changed, 164 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cd5b567d8ca8..a8a481c5ffa4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2716,6 +2716,9 @@ extern int ext4_group_extend(struct super_block *sb,
 extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
 
 /* super.c */
+int ext4_fc_async_commit(journal_t *journal, tid_t commit_tid,
+			 tid_t commit_subtid, struct inode *inode,
+			 struct dentry *dentry);
 extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
 					 sector_t block, int op_flags);
 extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 12d6e70bf676..cf796268322b 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -1144,6 +1144,80 @@ static int ext4_journal_fc_replay_cb(journal_t *journal, struct buffer_head *bh,
 	return ret;
 }
 
+int ext4_fc_async_commit(journal_t *journal, tid_t commit_tid,
+			 tid_t commit_subtid, struct inode *inode,
+			 struct dentry *dentry)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct buffer_head *bh;
+	int ret;
+
+	if (!ext4_should_fast_commit(sb))
+		return jbd2_complete_transaction(journal, commit_tid);
+
+	read_lock(&ei->i_fc.fc_lock);
+	if (ei->i_fc.fc_tid != commit_tid) {
+		read_unlock(&ei->i_fc.fc_lock);
+		return 0;
+	}
+	read_unlock(&ei->i_fc.fc_lock);
+
+	if (ext4_is_inode_fc_ineligible(inode))
+		return jbd2_complete_transaction(journal, commit_tid);
+
+	if (jbd2_commit_check(journal, commit_tid, commit_subtid))
+		return 0;
+
+	ret = jbd2_start_async_fc(journal, commit_tid);
+	if (ret)
+		return jbd2_fc_complete_commit(journal, commit_tid,
+					       commit_subtid);
+
+	trace_ext4_journal_fc_commit_cb_start(sb);
+
+	ret = jbd2_submit_inode_data(journal, ei->jinode);
+	if (ret)
+		goto out;
+
+	ret = jbd2_map_fc_buf(journal, &bh);
+	if (ret) {
+		jbd2_stop_async_fc(journal, commit_tid);
+		trace_ext4_journal_fc_commit_cb_stop(sb, 0, "map_fc_buf");
+		return jbd2_complete_transaction(journal, commit_tid);
+
+	}
+
+	ret = ext4_fc_write_inode(journal, bh, inode, commit_tid,
+				  commit_subtid, 1, dentry);
+
+	if (ret < 0) {
+		brelse(bh);
+		jbd2_stop_async_fc(journal, commit_tid);
+		trace_ext4_journal_fc_commit_cb_stop(sb, 0, "fc_write_inode");
+		return jbd2_complete_transaction(journal, commit_tid);
+	}
+	lock_buffer(bh);
+	clear_buffer_dirty(bh);
+	set_buffer_uptodate(bh);
+	bh->b_end_io = ext4_end_buffer_io_sync;
+	submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+
+	jbd2_stop_async_fc(journal, commit_tid);
+	wait_on_buffer(bh);
+	if (unlikely(!buffer_uptodate(bh))) {
+		trace_ext4_journal_fc_commit_cb_stop(sb, 0, "IO");
+		return -EIO;
+	}
+
+out:
+	trace_ext4_journal_fc_commit_cb_stop(sb,
+					     ret < 0 ? 0 : ret,
+					     ret >= 0 ? "success" : "fail");
+	wake_up(&journal->j_wait_async_fc);
+	return ret;
+}
+
 void ext4_init_fast_commit(struct super_block *sb, journal_t *journal)
 {
 	if (ext4_should_fast_commit(sb)) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5508baa11bb6..5bbfc55e1756 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -98,7 +98,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
 	int ret = 0, err;
-	tid_t commit_tid;
+	tid_t commit_tid, commit_subtid;
 	bool needs_barrier = false;
 
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
@@ -148,10 +148,13 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	}
 
 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
+	commit_subtid = datasync ? ei->i_datasync_subtid : ei->i_sync_subtid;
+
 	if (journal->j_flags & JBD2_BARRIER &&
 	    !jbd2_trans_will_send_data_barrier(journal, commit_tid))
 		needs_barrier = true;
-	ret = jbd2_complete_transaction(journal, commit_tid);
+	ret = ext4_fc_async_commit(journal, commit_tid, commit_subtid,
+				   inode, file->f_path.dentry);
 	if (needs_barrier) {
 	issue_flush:
 		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index e85f51e1cc70..18cb70fa2421 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -452,6 +452,17 @@ void jbd2_journal_commit_transaction(journal_t *journal, bool *fc)
 
 	write_lock(&journal->j_state_lock);
 	full_commit = journal->j_do_full_commit;
+	journal->j_running_transaction->t_async_fc_allowed = false;
+	while (journal->j_running_transaction->t_async_fc_ongoing) {
+		DEFINE_WAIT(wait);
+
+		prepare_to_wait(&journal->j_wait_async_fc, &wait,
+				TASK_UNINTERRUPTIBLE);
+		write_unlock(&journal->j_state_lock);
+		schedule();
+		write_lock(&journal->j_state_lock);
+		finish_wait(&journal->j_wait_async_fc, &wait);
+	}
 	write_unlock(&journal->j_state_lock);
 
 	/* Let file-system try its own fast commit */
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e0684212384d..81daa2cff67f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -794,6 +794,64 @@ int jbd2_commit_check(journal_t *journal, tid_t tid, tid_t subtid)
 	return 0;
 }
 
+int jbd2_start_async_fc(journal_t *journal, tid_t tid)
+{
+	transaction_t *txn;
+	int ret = -EINVAL;
+
+	if (!journal->j_running_transaction)
+		return ret;
+
+	if (journal->j_running_transaction->t_tid != tid)
+		return ret;
+
+	txn = journal->j_running_transaction;
+	write_lock(&journal->j_state_lock);
+	while (txn->t_state == T_RUNNING) {
+		DEFINE_WAIT(wait);
+
+		if (txn->t_async_fc_allowed) {
+			if (!txn->t_async_fc_ongoing) {
+				txn->t_async_fc_ongoing = true;
+				ret = 0;
+				break;
+			}
+			prepare_to_wait(&journal->j_wait_async_fc,
+					&wait, TASK_UNINTERRUPTIBLE);
+			write_unlock(&journal->j_state_lock);
+			schedule();
+			write_lock(&journal->j_state_lock);
+			finish_wait(&journal->j_wait_async_fc, &wait);
+		} else {
+			ret = -ECANCELED;
+			break;
+		}
+	}
+	write_unlock(&journal->j_state_lock);
+
+	return ret;
+}
+
+int jbd2_stop_async_fc(journal_t *journal, tid_t tid)
+{
+	transaction_t *txn;
+
+	if (!journal->j_running_transaction)
+		return -EINVAL;
+
+	if (journal->j_running_transaction->t_tid != tid)
+		return -EINVAL;
+
+	txn = journal->j_running_transaction;
+	write_lock(&journal->j_state_lock);
+	J_ASSERT(txn->t_state == T_RUNNING);
+	txn->t_async_fc_ongoing = false;
+	txn->t_subtid++;
+	write_unlock(&journal->j_state_lock);
+	return 0;
+
+}
+
 /* Return 1 when transaction with given tid has already committed. */
 int jbd2_transaction_committed(journal_t *journal, tid_t tid)
 {
@@ -1308,6 +1366,7 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	init_waitqueue_head(&journal->j_wait_commit);
 	init_waitqueue_head(&journal->j_wait_updates);
 	init_waitqueue_head(&journal->j_wait_reserved);
+	init_waitqueue_head(&journal->j_wait_async_fc);
 	mutex_init(&journal->j_barrier);
 	mutex_init(&journal->j_checkpoint_mutex);
 	spin_lock_init(&journal->j_revoke_lock);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index ce7f03cfd90b..f17f813b5610 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -103,6 +103,8 @@ static void jbd2_get_transaction(journal_t *journal,
 	transaction->t_max_wait = 0;
 	transaction->t_start = jiffies;
 	transaction->t_requested = 0;
+	transaction->t_async_fc_allowed = true;
+	transaction->t_async_fc_ongoing = false;
 }
 
 /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 312103fc9581..5610f16de919 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -604,6 +604,7 @@ struct transaction_s
 		T_FINISHED
 	}			t_state;
 
+	bool t_async_fc_allowed, t_async_fc_ongoing;
 	/*
 	 * Where in the log does this transaction's commit start? [no locking]
 	 */
@@ -869,6 +870,13 @@ struct journal_s
 	 */
 	wait_queue_head_t	j_wait_reserved;
 
+	/**
+	 * @j_wait_async_fc:
+	 *
+	 * Wait queue to wait for completion of async fast commits.
+	 */
+	wait_queue_head_t	j_wait_async_fc;
+
 	/**
 	 * @j_checkpoint_mutex:
 	 *
@@ -1594,6 +1602,8 @@ int jbd2_complete_transaction(journal_t *journal, tid_t tid);
 int jbd2_log_do_checkpoint(journal_t *journal);
 int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
 int jbd2_fc_complete_commit(journal_t *journal, tid_t tid, tid_t subtid);
+int jbd2_start_async_fc(journal_t *journal, tid_t tid);
+int jbd2_stop_async_fc(journal_t *journal, tid_t tid);
 
 void __jbd2_log_wait_for_space(journal_t *journal);
 extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
-- 
2.23.0.444.g18eeb5a265-goog

Powered by blists - more mailing lists