lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080307152328.GE24114@v2.random>
Date:	Fri, 7 Mar 2008 16:23:28 +0100
From:	Andrea Arcangeli <andrea@...ranet.com>
To:	Christoph Lameter <clameter@....com>
Cc:	Jack Steiner <steiner@....com>, Nick Piggin <npiggin@...e.de>,
	akpm@...ux-foundation.org, Robin Holt <holt@....com>,
	Avi Kivity <avi@...ranet.com>, kvm-devel@...ts.sourceforge.net,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	general@...ts.openfabrics.org,
	Steve Wise <swise@...ngridcomputing.com>,
	Roland Dreier <rdreier@...co.com>,
	Kanoj Sarcar <kanojsarcar@...oo.com>,
	linux-kernel@...r.kernel.org, linux-mm@...ck.org,
	daniel.blueman@...drics.com
Subject: Re: [PATCH] 3/4 combine RCU with seqlock to allow mmu notifier
	methods to sleep (#v9 was 1/4)

This combines the non-sleep-capable RCU locking of #v9 with a seqlock
so the mmu notifier fast path will require zero cacheline
writes/bouncing while still providing mmu_notifier_unregister and
allowing to schedule inside the mmu notifier methods. If we drop
mmu_notifier_unregister we can as well drop all seqlock and
rcu_read_lock()s. But this locking scheme combination is sexy enough
and 100% scalable (the mmu_notifier_list cacheline will be preloaded
anyway and that will most certainly include the sequence number value
in l1 for free even in Christoph's NUMA systems) so IMHO it worth to
keep mmu_notifier_unregister.

Signed-off-by: Andrea Arcangeli <andrea@...ranet.com>

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,6 +10,7 @@
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
+#include <linux/seqlock.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 
@@ -230,6 +231,7 @@ struct mm_struct {
 #endif
 #ifdef CONFIG_MMU_NOTIFIER
 	struct hlist_head mmu_notifier_list;
+	seqlock_t mmu_notifier_lock;
 #endif
 };
 
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -130,6 +130,7 @@ static inline void mmu_notifier_mm_init(
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
 	INIT_HLIST_HEAD(&mm->mmu_notifier_list);
+	seqlock_init(&mm->mmu_notifier_lock);
 }
 
 
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -20,7 +20,9 @@ void __mmu_notifier_release(struct mm_st
 void __mmu_notifier_release(struct mm_struct *mm)
 {
 	struct mmu_notifier *mn;
+	unsigned seq;
 
+	seq = read_seqbegin(&mm->mmu_notifier_lock);
 	while (unlikely(!hlist_empty(&mm->mmu_notifier_list))) {
 		mn = hlist_entry(mm->mmu_notifier_list.first,
 				 struct mmu_notifier,
@@ -28,6 +30,7 @@ void __mmu_notifier_release(struct mm_st
 		hlist_del(&mn->hlist);
 		if (mn->ops->release)
 			mn->ops->release(mn, mm);
+		BUG_ON(read_seqretry(&mm->mmu_notifier_lock, seq));
 	}
 }
 
@@ -42,11 +45,19 @@ int __mmu_notifier_clear_flush_young(str
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
 	int young = 0;
+	unsigned seq;
 
 	rcu_read_lock();
+restart:
+	seq = read_seqbegin(&mm->mmu_notifier_lock);
 	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
-		if (mn->ops->clear_flush_young)
+		if (mn->ops->clear_flush_young) {
+			rcu_read_unlock();
 			young |= mn->ops->clear_flush_young(mn, mm, address);
+			rcu_read_lock();
+		}
+		if (read_seqretry(&mm->mmu_notifier_lock, seq))
+			goto restart;
 	}
 	rcu_read_unlock();
 
@@ -58,11 +69,19 @@ void __mmu_notifier_invalidate_page(stru
 {
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
+	unsigned seq;
 
 	rcu_read_lock();
+restart:
+	seq = read_seqbegin(&mm->mmu_notifier_lock);
 	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
-		if (mn->ops->invalidate_page)
+		if (mn->ops->invalidate_page) {
+			rcu_read_unlock();
 			mn->ops->invalidate_page(mn, mm, address);
+			rcu_read_lock();
+		}
+		if (read_seqretry(&mm->mmu_notifier_lock, seq))
+			goto restart;
 	}
 	rcu_read_unlock();
 }
@@ -72,11 +91,19 @@ void __mmu_notifier_invalidate_range_beg
 {
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
+	unsigned seq;
 
 	rcu_read_lock();
+restart:
+	seq = read_seqbegin(&mm->mmu_notifier_lock);
 	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
-		if (mn->ops->invalidate_range_begin)
+		if (mn->ops->invalidate_range_begin) {
+			rcu_read_unlock();
 			mn->ops->invalidate_range_begin(mn, mm, start, end);
+			rcu_read_lock();
+		}
+		if (read_seqretry(&mm->mmu_notifier_lock, seq))
+			goto restart;
 	}
 	rcu_read_unlock();
 }
@@ -86,11 +113,19 @@ void __mmu_notifier_invalidate_range_end
 {
 	struct mmu_notifier *mn;
 	struct hlist_node *n;
+	unsigned seq;
 
 	rcu_read_lock();
+restart:
+	seq = read_seqbegin(&mm->mmu_notifier_lock);
 	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
-		if (mn->ops->invalidate_range_end)
+		if (mn->ops->invalidate_range_end) {
+			rcu_read_unlock();
 			mn->ops->invalidate_range_end(mn, mm, start, end);
+			rcu_read_lock();
+		}
+		if (read_seqretry(&mm->mmu_notifier_lock, seq))
+			goto restart;
 	}
 	rcu_read_unlock();
 }
@@ -103,12 +138,20 @@ void __mmu_notifier_invalidate_range_end
  */
 void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
 {
+	/* no need of seqlock for hlist_add_head_rcu */
 	hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier_list);
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_register);
 
 void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 {
+	/*
+	 * The seqlock tracks if a hlist_del_rcu happens while a
+	 * notifier method is scheduling and in such a case the "mn"
+	 * memory may have been freed by the time the method returns.
+	 */
+	write_seqlock(&mm->mmu_notifier_lock);
 	hlist_del_rcu(&mn->hlist);
+	write_sequnlock(&mm->mmu_notifier_lock);
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ