[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241025204008.4076565-13-dhowells@redhat.com>
Date: Fri, 25 Oct 2024 21:39:39 +0100
From: David Howells <dhowells@...hat.com>
To: Christian Brauner <christian@...uner.io>,
Steve French <smfrench@...il.com>,
Matthew Wilcox <willy@...radead.org>
Cc: David Howells <dhowells@...hat.com>,
Jeff Layton <jlayton@...nel.org>,
Gao Xiang <hsiangkao@...ux.alibaba.com>,
Dominique Martinet <asmadeus@...ewreck.org>,
Marc Dionne <marc.dionne@...istor.com>,
Paulo Alcantara <pc@...guebit.com>,
Shyam Prasad N <sprasad@...rosoft.com>,
Tom Talpey <tom@...pey.com>,
Eric Van Hensbergen <ericvh@...nel.org>,
Ilya Dryomov <idryomov@...il.com>,
netfs@...ts.linux.dev,
linux-afs@...ts.infradead.org,
linux-cifs@...r.kernel.org,
linux-nfs@...r.kernel.org,
ceph-devel@...r.kernel.org,
v9fs@...ts.linux.dev,
linux-erofs@...ts.ozlabs.org,
linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org,
netdev@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v2 12/31] afs: Don't use mutex for I/O operation lock
Don't use the standard mutex for the I/O operation lock, but rather
implement our own as the standard mutex must be released in the same thread
as locked it. This is a problem when it comes to doing async FetchData
where the lock will be dropped from the workqueue that processed the
incoming data and not from the issuing thread.
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Marc Dionne <marc.dionne@...istor.com>
cc: linux-afs@...ts.infradead.org
---
fs/afs/fs_operation.c | 111 +++++++++++++++++++++++++++++++++++++++---
fs/afs/internal.h | 3 +-
fs/afs/super.c | 2 +-
3 files changed, 108 insertions(+), 8 deletions(-)
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 428721bbe4f6..8488ff8183fa 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -49,6 +49,105 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo
return op;
}
+struct afs_io_locker {
+ struct list_head link;
+ struct task_struct *task;
+ unsigned long have_lock;
+};
+
+/*
+ * Unlock the I/O lock on a vnode.
+ */
+static void afs_unlock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker *locker;
+
+ spin_lock(&vnode->lock);
+ locker = list_first_entry_or_null(&vnode->io_lock_waiters,
+ struct afs_io_locker, link);
+ if (locker) {
+ list_del(&locker->link);
+ smp_store_release(&locker->have_lock, 1);
+ smp_mb__after_atomic(); /* Store have_lock before task state */
+ wake_up_process(locker->task);
+ } else {
+ clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags);
+ }
+ spin_unlock(&vnode->lock);
+}
+
+/*
+ * Lock the I/O lock on a vnode uninterruptibly. We can't use an ordinary
+ * mutex as lockdep will complain if we unlock it in the wrong thread.
+ */
+static void afs_lock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock))
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+}
+
+/*
+ * Lock the I/O lock on a vnode interruptibly. We can't use an ordinary mutex
+ * as lockdep will complain if we unlock it in the wrong thread.
+ */
+static int afs_lock_for_io_interruptible(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+ int ret = 0;
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return 0;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock) ||
+ signal_pending(current))
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ /* If we got a signal, try to transfer the lock onto the next
+ * waiter.
+ */
+ if (unlikely(signal_pending(current))) {
+ spin_lock(&vnode->lock);
+ if (myself.have_lock) {
+ spin_unlock(&vnode->lock);
+ afs_unlock_for_io(vnode);
+ } else {
+ list_del(&myself.link);
+ spin_unlock(&vnode->lock);
+ }
+ ret = -ERESTARTSYS;
+ }
+ return ret;
+}
+
/*
* Lock the vnode(s) being operated upon.
*/
@@ -60,7 +159,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_UNINTR) {
- mutex_lock(&vnode->io_lock);
+ afs_lock_for_io(vnode);
op->flags |= AFS_OPERATION_LOCK_0;
_leave(" = t [1]");
return true;
@@ -72,7 +171,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
if (vnode2 > vnode)
swap(vnode, vnode2);
- if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+ if (afs_lock_for_io_interruptible(vnode) < 0) {
afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
_leave(" = f [I 0]");
@@ -81,10 +180,10 @@ static bool afs_get_io_locks(struct afs_operation *op)
op->flags |= AFS_OPERATION_LOCK_0;
if (vnode2) {
- if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
+ if (afs_lock_for_io_interruptible(vnode2) < 0) {
afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
op->flags &= ~AFS_OPERATION_LOCK_0;
_leave(" = f [I 1]");
return false;
@@ -104,9 +203,9 @@ static void afs_drop_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_LOCK_1)
- mutex_unlock(&vnode2->io_lock);
+ afs_unlock_for_io(vnode2);
if (op->flags & AFS_OPERATION_LOCK_0)
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
}
static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c9d620175e80..07b8f7083e73 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -702,13 +702,14 @@ struct afs_vnode {
struct afs_file_status status; /* AFS status info for this file */
afs_dataversion_t invalid_before; /* Child dentries are invalid before this */
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
- struct mutex io_lock; /* Lock for serialising I/O on this mutex */
+ struct list_head io_lock_waiters; /* Threads waiting for the I/O lock */
struct rw_semaphore validate_lock; /* lock for validating this vnode */
struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */
struct key *silly_key; /* Silly rename key */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
+#define AFS_VNODE_IO_LOCK 0 /* Set if the I/O serialisation lock is held */
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f3ba1c3e72f5..7631302c1984 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -663,7 +663,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->netfs.inode);
- mutex_init(&vnode->io_lock);
+ INIT_LIST_HEAD(&vnode->io_lock_waiters);
init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
Powered by blists - more mailing lists