linux-kernel - [PATCH] nfs: fix the race of lock/unlock and open

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250419085709.1452492-1-lilingfeng3@huawei.com>
Date: Sat, 19 Apr 2025 16:57:09 +0800
From: Li Lingfeng <lilingfeng3@...wei.com>
To: <trondmy@...nel.org>, <anna@...nel.org>, <jlayton@...nel.org>,
	<bcodding@...hat.com>
CC: <linux-nfs@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	<yukuai1@...weicloud.com>, <houtao1@...wei.com>, <yi.zhang@...wei.com>,
	<yangerkun@...wei.com>, <lilingfeng@...weicloud.com>,
	<lilingfeng3@...wei.com>
Subject: [PATCH] nfs: fix the race of lock/unlock and open

LOCK may extend an existing lock and release another one and UNLOCK may
also release an existing lock.
When opening a file, there may be access to file locks that have been
concurrently released by lock/unlock operations, potentially triggering
UAF.
While certain concurrent scenarios involving lock/unlock and open
operations have been safeguarded with locks – for example,
nfs4_proc_unlckz() acquires the so_delegreturn_mutex prior to invoking
locks_lock_inode_wait() – there remain cases where such protection is not
yet implemented.

The issue can be reproduced through the following steps:
T1: open in read-only mode with three consecutive lock operations applied
    lock1(0~100) --> add lock1 to file
    lock2(120~200) --> add lock2 to file
    lock3(50~150) --> extend lock1 to cover range 0~200 and release lock2
T2: restart nfs-server and run state manager
T3: open in write-only mode
    T1                            T2                                T3
                            start recover
lock1
lock2
                            nfs4_open_reclaim
                            clear_bit // NFS_DELEGATED_STATE
lock3
 _nfs4_proc_setlk
  lock so_delegreturn_mutex
  unlock so_delegreturn_mutex
  _nfs4_do_setlk
                            recover done
                                                lock so_delegreturn_mutex
                                                nfs_delegation_claim_locks
                                                get lock2
   rpc_run_task
   ...
   nfs4_lock_done
    locks_lock_inode_wait
    ...
     locks_dispose_list
     free lock2
                                                use lock2
                                                // UAF
                                                unlock so_delegreturn_mutex

Get so_delegreturn_mutex before calling locks_lock_inode_wait to fix this
issue.

Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
Signed-off-by: Li Lingfeng <lilingfeng3@...wei.com>
---
 fs/nfs/nfs4proc.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 970f28dbf253..297ee2442c02 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7112,13 +7112,16 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 		.inode = calldata->lsp->ls_state->inode,
 		.stateid = &calldata->arg.stateid,
 	};
+	struct nfs4_state_owner *sp = calldata->ctx->state->owner;
 
 	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
 		return;
 	switch (task->tk_status) {
 		case 0:
 			renew_lease(calldata->server, calldata->timestamp);
+			mutex_lock(&sp->so_delegreturn_mutex);
 			locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
+			mutex_unlock(&sp->so_delegreturn_mutex);
 			if (nfs4_update_lock_stateid(calldata->lsp,
 					&calldata->res.stateid))
 				break;
@@ -7375,6 +7378,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_lockdata *data = calldata;
 	struct nfs4_lock_state *lsp = data->lsp;
+	struct nfs4_state_owner *sp = data->ctx->state->owner;
 
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return;
@@ -7386,8 +7390,12 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 				data->timestamp);
 		if (data->arg.new_lock && !data->cancelled) {
 			data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
-			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
+			mutex_lock(&sp->so_delegreturn_mutex);
+			if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) {
+				mutex_unlock(&sp->so_delegreturn_mutex);
 				goto out_restart;
+			}
+			mutex_unlock(&sp->so_delegreturn_mutex);
 		}
 		if (data->arg.new_lock_owner != 0) {
 			nfs_confirm_seqid(&lsp->ls_seqid, 0);
@@ -7597,11 +7605,14 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	int status;
 
 	request->c.flc_flags |= FL_ACCESS;
-	status = locks_lock_inode_wait(state->inode, request);
-	if (status < 0)
-		goto out;
 	mutex_lock(&sp->so_delegreturn_mutex);
 	down_read(&nfsi->rwsem);
+	status = locks_lock_inode_wait(state->inode, request);
+	if (status < 0) {
+		up_read(&nfsi->rwsem);
+		mutex_unlock(&sp->so_delegreturn_mutex);
+		goto out;
+	}
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		/* Yes: cache locks! */
 		/* ...but avoid races with delegation recall... */
-- 
2.31.1