lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250830101413.602637-4-226562783+SigAttilio@users.noreply.github.com>
Date: Sat, 30 Aug 2025 12:14:05 +0200
From: Alessio Attilio <alessio.attilio.dev@...il.com>
To: gfs2@...ts.linux.dev
Cc: linux-kernel@...r.kernel.org,
	aahringo@...hat.com,
	teigland@...hat.com,
	Alessio Attilio <226562783+SigAttilio@...rs.noreply.github.com>
Subject: [PATCH 04/12] fix(find_rsb_nodir): corrects the management of inactive rsb and the creation of new rsb

---
 fs/dlm/lock.c | 118 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 84 insertions(+), 34 deletions(-)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index c62ec235c047..de77a0e9e2db 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1265,69 +1265,119 @@ static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len,
 		goto do_inactive;
 	}
 
-	/* because the rsb is active, we need to lock_rsb before
-	 * checking/changing re_master_nodeid
+	/*
+	 * rsb is active, so we can't check master_nodeid without lock_rsb.
 	 */
 
-	hold_rsb(r);
+	kref_get(&r->res_ref);
 	read_unlock_bh(&ls->ls_rsbtbl_lock);
-	lock_rsb(r);
-
-	__dlm_master_lookup(ls, r, our_nodeid, from_nodeid, false,
-			    flags, r_nodeid, result);
 
-	/* the rsb was active */
-	unlock_rsb(r);
-	put_rsb(r);
+	goto out;
 
-	return 0;
 
  do_inactive:
-	/* unlikely path - check if still part of ls_rsbtbl */
 	write_lock_bh(&ls->ls_rsbtbl_lock);
 
-	/* see comment in find_rsb_dir */
+	/* See comment in find_rsb_dir. */
 	if (rsb_flag(r, RSB_HASHED)) {
 		if (!rsb_flag(r, RSB_INACTIVE)) {
 			write_unlock_bh(&ls->ls_rsbtbl_lock);
-			/* something as changed, very unlikely but
-			 * try again
-			 */
 			goto retry;
 		}
 	} else {
 		write_unlock_bh(&ls->ls_rsbtbl_lock);
-		goto not_found;
+		goto do_new;
 	}
 
-	/* because the rsb is inactive, it's not refcounted and lock_rsb
-	   is not used, but is protected by the rsbtbl lock */
-
-	__dlm_master_lookup(ls, r, our_nodeid, from_nodeid, true, flags,
-			    r_nodeid, result);
 
-	/* A dir record rsb should never be on scan list.
-	 * Except when we are the dir and master node.
-	 * This function should only be called by the dir
-	 * node.
+	/*
+	 * rsb found inactive. No other thread is using this rsb because
+	 * it's inactive, so we can look at or update res_master_nodeid
+	 * without lock_rsb.
 	 */
-	WARN_ON(!list_empty(&r->res_scan_list) &&
-		r->res_master_nodeid != our_nodeid);
 
+	if (!recover && (r->res_master_nodeid != our_nodeid) && from_nodeid) {
+		/* our rsb is not master, and another node has sent us a
+		   request; this should never happen */
+		log_error(ls, "find_rsb inactive from_nodeid %d master %d dir %d",
+			  from_nodeid, r->res_master_nodeid, dir_nodeid);
+		write_unlock_bh(&ls->ls_rsbtbl_lock);
+		error = -ENOTBLK;
+		goto out;
+	}
+
+	if (!recover && (r->res_master_nodeid != our_nodeid) &&
+	    (dir_nodeid == our_nodeid)) {
+		/* our rsb is not master, and we are dir; may as well fix it;
+		   this should never happen */
+		log_error(ls, "find_rsb inactive our %d master %d dir %d",
+			  our_nodeid, r->res_master_nodeid, dir_nodeid);
+		dlm_print_rsb(r);
+		r->res_master_nodeid = our_nodeid;
+		r->res_nodeid = 0;
+	}
+
+	del_scan(ls, r);
+	list_move(&r->res_slow_list, &ls->ls_slow_active);
+	rsb_clear_flag(r, RSB_INACTIVE);
+	kref_init(&r->res_ref); /* ref is now used in active state */
 	write_unlock_bh(&ls->ls_rsbtbl_lock);
 
-	return 0;
+	goto out;
+
+
+ do_new:
+	/*
+	 * rsb not found
+	 */
+
+	if (error == -EBADR && !create)
+		goto out;
 
- not_found:
 	error = get_rsb_struct(ls, name, len, &r);
 	if (WARN_ON_ONCE(error))
 		goto out;
 
 	r->res_hash = hash;
-	r->res_dir_nodeid = our_nodeid;
-	r->res_master_nodeid = from_nodeid;
-	r->res_nodeid = from_nodeid;
-	rsb_set_flag(r, RSB_INACTIVE);
+	r->res_dir_nodeid = dir_nodeid;
+	kref_init(&r->res_ref);
+
+	if (from_dir) {
+		/* want to see how often this happens */
+		log_debug(ls, "find_rsb new from_dir %d recreate %s",
+			  from_nodeid, r->res_name);
+		r->res_master_nodeid = our_nodeid;
+		r->res_nodeid = 0;
+		goto out_add;
+	}
+
+	if (from_other && (dir_nodeid != our_nodeid)) {
+		/* should never happen */
+		log_error(ls, "find_rsb new from_other %d dir %d our %d %s",
+			  from_nodeid, dir_nodeid, our_nodeid, r->res_name);
+		dlm_free_rsb(r);
+		r = NULL;
+		error = -ENOTBLK;
+		goto out;
+	}
+
+	if (from_other) {
+		log_debug(ls, "find_rsb new from_other %d dir %d %s",
+			  from_nodeid, dir_nodeid, r->res_name);
+	}
+
+	if (dir_nodeid == our_nodeid) {
+		/* When we are the dir nodeid, we can set the master
+		   node immediately */
+		r->res_master_nodeid = our_nodeid;
+		r->res_nodeid = 0;
+	} else {
+		/* set_master will send_lookup to dir_nodeid */
+		r->res_master_nodeid = 0;
+		r->res_nodeid = -1;
+	}
+
+ out_add:
 
 	write_lock_bh(&ls->ls_rsbtbl_lock);
 	error = rsb_insert(r, &ls->ls_rsbtbl);
-- 
2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ