[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250830101413.602637-4-226562783+SigAttilio@users.noreply.github.com>
Date: Sat, 30 Aug 2025 12:14:05 +0200
From: Alessio Attilio <alessio.attilio.dev@...il.com>
To: gfs2@...ts.linux.dev
Cc: linux-kernel@...r.kernel.org,
aahringo@...hat.com,
teigland@...hat.com,
Alessio Attilio <226562783+SigAttilio@...rs.noreply.github.com>
Subject: [PATCH 04/12] fix(find_rsb_nodir): corrects the management of inactive rsb and the creation of new rsb
---
fs/dlm/lock.c | 118 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 84 insertions(+), 34 deletions(-)
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index c62ec235c047..de77a0e9e2db 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1265,69 +1265,119 @@ static int find_rsb_nodir(struct dlm_ls *ls, const void *name, int len,
goto do_inactive;
}
- /* because the rsb is active, we need to lock_rsb before
- * checking/changing re_master_nodeid
+ /*
+ * rsb is active, so we can't check master_nodeid without lock_rsb.
*/
- hold_rsb(r);
+ kref_get(&r->res_ref);
read_unlock_bh(&ls->ls_rsbtbl_lock);
- lock_rsb(r);
-
- __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, false,
- flags, r_nodeid, result);
- /* the rsb was active */
- unlock_rsb(r);
- put_rsb(r);
+ goto out;
- return 0;
do_inactive:
- /* unlikely path - check if still part of ls_rsbtbl */
write_lock_bh(&ls->ls_rsbtbl_lock);
- /* see comment in find_rsb_dir */
+ /* See comment in find_rsb_dir. */
if (rsb_flag(r, RSB_HASHED)) {
if (!rsb_flag(r, RSB_INACTIVE)) {
write_unlock_bh(&ls->ls_rsbtbl_lock);
- /* something as changed, very unlikely but
- * try again
- */
goto retry;
}
} else {
write_unlock_bh(&ls->ls_rsbtbl_lock);
- goto not_found;
+ goto do_new;
}
- /* because the rsb is inactive, it's not refcounted and lock_rsb
- is not used, but is protected by the rsbtbl lock */
-
- __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, true, flags,
- r_nodeid, result);
- /* A dir record rsb should never be on scan list.
- * Except when we are the dir and master node.
- * This function should only be called by the dir
- * node.
+ /*
+ * rsb found inactive. No other thread is using this rsb because
+ * it's inactive, so we can look at or update res_master_nodeid
+ * without lock_rsb.
*/
- WARN_ON(!list_empty(&r->res_scan_list) &&
- r->res_master_nodeid != our_nodeid);
+ if (!recover && (r->res_master_nodeid != our_nodeid) && from_nodeid) {
+ /* our rsb is not master, and another node has sent us a
+ request; this should never happen */
+ log_error(ls, "find_rsb inactive from_nodeid %d master %d dir %d",
+ from_nodeid, r->res_master_nodeid, dir_nodeid);
+ write_unlock_bh(&ls->ls_rsbtbl_lock);
+ error = -ENOTBLK;
+ goto out;
+ }
+
+ if (!recover && (r->res_master_nodeid != our_nodeid) &&
+ (dir_nodeid == our_nodeid)) {
+ /* our rsb is not master, and we are dir; may as well fix it;
+ this should never happen */
+ log_error(ls, "find_rsb inactive our %d master %d dir %d",
+ our_nodeid, r->res_master_nodeid, dir_nodeid);
+ dlm_print_rsb(r);
+ r->res_master_nodeid = our_nodeid;
+ r->res_nodeid = 0;
+ }
+
+ del_scan(ls, r);
+ list_move(&r->res_slow_list, &ls->ls_slow_active);
+ rsb_clear_flag(r, RSB_INACTIVE);
+ kref_init(&r->res_ref); /* ref is now used in active state */
write_unlock_bh(&ls->ls_rsbtbl_lock);
- return 0;
+ goto out;
+
+
+ do_new:
+ /*
+ * rsb not found
+ */
+
+ if (error == -EBADR && !create)
+ goto out;
- not_found:
error = get_rsb_struct(ls, name, len, &r);
if (WARN_ON_ONCE(error))
goto out;
r->res_hash = hash;
- r->res_dir_nodeid = our_nodeid;
- r->res_master_nodeid = from_nodeid;
- r->res_nodeid = from_nodeid;
- rsb_set_flag(r, RSB_INACTIVE);
+ r->res_dir_nodeid = dir_nodeid;
+ kref_init(&r->res_ref);
+
+ if (from_dir) {
+ /* want to see how often this happens */
+ log_debug(ls, "find_rsb new from_dir %d recreate %s",
+ from_nodeid, r->res_name);
+ r->res_master_nodeid = our_nodeid;
+ r->res_nodeid = 0;
+ goto out_add;
+ }
+
+ if (from_other && (dir_nodeid != our_nodeid)) {
+ /* should never happen */
+ log_error(ls, "find_rsb new from_other %d dir %d our %d %s",
+ from_nodeid, dir_nodeid, our_nodeid, r->res_name);
+ dlm_free_rsb(r);
+ r = NULL;
+ error = -ENOTBLK;
+ goto out;
+ }
+
+ if (from_other) {
+ log_debug(ls, "find_rsb new from_other %d dir %d %s",
+ from_nodeid, dir_nodeid, r->res_name);
+ }
+
+ if (dir_nodeid == our_nodeid) {
+ /* When we are the dir nodeid, we can set the master
+ node immediately */
+ r->res_master_nodeid = our_nodeid;
+ r->res_nodeid = 0;
+ } else {
+ /* set_master will send_lookup to dir_nodeid */
+ r->res_master_nodeid = 0;
+ r->res_nodeid = -1;
+ }
+
+ out_add:
write_lock_bh(&ls->ls_rsbtbl_lock);
error = rsb_insert(r, &ls->ls_rsbtbl);
--
2.48.1
Powered by blists - more mailing lists