lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240925143154.2322926-4-ranxiaokai627@163.com>
Date: Wed, 25 Sep 2024 14:31:53 +0000
From: ran xiaokai <ranxiaokai627@....com>
To: elver@...gle.com,
	tglx@...utronix.de,
	dvyukov@...gle.com
Cc: kasan-dev@...glegroups.com,
	linux-kernel@...r.kernel.org,
	Ran Xiaokai <ran.xiaokai@....com.cn>
Subject: [PATCH 3/4] kcsan, debugfs: fix atomic sleep by converting spinlock_t to rcu lock

From: Ran Xiaokai <ran.xiaokai@....com.cn>

In a preempt-RT kernel, most of the irq handlers have been
converted to the threaded mode except those which have the
IRQF_NO_THREAD flag set. The hrtimer IRQ is such an example.
So kcsan report could be triggered from a HARD-irq context, this will
trigger the "sleeping function called from invalid context" bug.

[    C1] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
[    C1] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1
[    C1] preempt_count: 10002, expected: 0
[    C1] RCU nest depth: 0, expected: 0
[    C1] no locks held by swapper/1/0.
[    C1] irq event stamp: 156674
[    C1] hardirqs last  enabled at (156673): [<ffffffff81130bd9>] do_idle+0x1f9/0x240
[    C1] hardirqs last disabled at (156674): [<ffffffff82254f84>] sysvec_apic_timer_interrupt+0x14/0xc0
[    C1] softirqs last  enabled at (0): [<ffffffff81099f47>] copy_process+0xfc7/0x4b60
[    C1] softirqs last disabled at (0): [<0000000000000000>] 0x0
[    C1] Preemption disabled at:
[    C1] [<ffffffff814a3e2a>] paint_ptr+0x2a/0x90
[    C1] CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Not tainted 6.11.0+ #3
[    C1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014
[    C1] Call Trace:
[    C1]  <IRQ>
[    C1]  dump_stack_lvl+0x7e/0xc0
[    C1]  dump_stack+0x1d/0x30
[    C1]  __might_resched+0x1a2/0x270
[    C1]  rt_spin_lock+0x68/0x170
[    C1]  ? kcsan_skip_report_debugfs+0x43/0xe0
[    C1]  kcsan_skip_report_debugfs+0x43/0xe0
[    C1]  ? hrtimer_next_event_without+0x110/0x110
[    C1]  print_report+0xb5/0x590
[    C1]  kcsan_report_known_origin+0x1b1/0x1d0
[    C1]  kcsan_setup_watchpoint+0x348/0x650
[    C1]  __tsan_unaligned_write1+0x16d/0x1d0
[    C1]  hrtimer_interrupt+0x3d6/0x430
[    C1]  __sysvec_apic_timer_interrupt+0xe8/0x3a0
[    C1]  sysvec_apic_timer_interrupt+0x97/0xc0
[    C1]  </IRQ>

To fix this, we can not simply convert the report_filterlist_lock
to a raw_spinlock_t. In the insert_report_filterlist() path:

raw_spin_lock_irqsave(&report_filterlist_lock, flags);
  krealloc
    __do_kmalloc_node
      slab_alloc_node
        __slab_alloc
          local_lock_irqsave(&s->cpu_slab->lock, flags)

local_lock_t is now a spinlock_t which is sleepable in preempt-RT
kernel, so kmalloc() and similar functions can not be called with
a raw_spinlock_t lock held.

Instead, we can convert it to rcu lock to fix this.
Aso introduce a mutex to serialize user-space write operations.

Signed-off-by: Ran Xiaokai <ran.xiaokai@....com.cn>
---
 kernel/kcsan/debugfs.c | 172 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 113 insertions(+), 59 deletions(-)

diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
index 30547507f497..d5e624c37125 100644
--- a/kernel/kcsan/debugfs.c
+++ b/kernel/kcsan/debugfs.c
@@ -40,20 +40,17 @@ static_assert(ARRAY_SIZE(counter_names) == KCSAN_COUNTER_COUNT);
  * Addresses for filtering functions from reporting. This list can be used as a
  * whitelist or blacklist.
  */
-static struct {
+struct report_filterlist {
 	unsigned long	*addrs;		/* array of addresses */
 	size_t		size;		/* current size */
 	int		used;		/* number of elements used */
 	bool		sorted;		/* if elements are sorted */
 	bool		whitelist;	/* if list is a blacklist or whitelist */
-} report_filterlist = {
-	.addrs		= NULL,
-	.size		= 8,		/* small initial size */
-	.used		= 0,
-	.sorted		= false,
-	.whitelist	= false,	/* default is blacklist */
+	struct rcu_head rcu;
 };
-static DEFINE_SPINLOCK(report_filterlist_lock);
+
+static DEFINE_MUTEX(rp_flist_mutex);
+static struct report_filterlist __rcu *rp_flist;
 
 /*
  * The microbenchmark allows benchmarking KCSAN core runtime only. To run
@@ -103,98 +100,141 @@ static int cmp_filterlist_addrs(const void *rhs, const void *lhs)
 bool kcsan_skip_report_debugfs(unsigned long func_addr)
 {
 	unsigned long symbolsize, offset;
-	unsigned long flags;
 	bool ret = false;
+	struct report_filterlist *list;
 
 	if (!kallsyms_lookup_size_offset(func_addr, &symbolsize, &offset))
 		return false;
 	func_addr -= offset; /* Get function start */
 
-	spin_lock_irqsave(&report_filterlist_lock, flags);
-	if (report_filterlist.used == 0)
+	rcu_read_lock();
+	list = rcu_dereference(rp_flist);
+
+	if (!list)
+		goto out;
+
+	if (list->used == 0)
 		goto out;
 
 	/* Sort array if it is unsorted, and then do a binary search. */
-	if (!report_filterlist.sorted) {
-		sort(report_filterlist.addrs, report_filterlist.used,
+	if (!list->sorted) {
+		sort(list->addrs, list->used,
 		     sizeof(unsigned long), cmp_filterlist_addrs, NULL);
-		report_filterlist.sorted = true;
+		list->sorted = true;
 	}
-	ret = !!bsearch(&func_addr, report_filterlist.addrs,
-			report_filterlist.used, sizeof(unsigned long),
+	ret = !!bsearch(&func_addr, list->addrs,
+			list->used, sizeof(unsigned long),
 			cmp_filterlist_addrs);
-	if (report_filterlist.whitelist)
+	if (list->whitelist)
 		ret = !ret;
 
 out:
-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
+	rcu_read_unlock();
 	return ret;
 }
 
 static ssize_t set_report_filterlist_whitelist(bool whitelist)
 {
-	unsigned long flags;
+	struct report_filterlist *new_list, *old_list;
+	ssize_t ret = 0;
 
-	spin_lock_irqsave(&report_filterlist_lock, flags);
-	report_filterlist.whitelist = whitelist;
-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
-	return 0;
+	mutex_lock(&rp_flist_mutex);
+	old_list = rcu_dereference_protected(rp_flist,
+					   lockdep_is_held(&rp_flist_mutex));
+
+	new_list = kzalloc(sizeof(*new_list), GFP_KERNEL);
+	if (!new_list) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	memcpy(new_list, old_list, sizeof(struct report_filterlist));
+	new_list->whitelist = whitelist;
+
+	rcu_assign_pointer(rp_flist, new_list);
+	synchronize_rcu();
+	kfree(old_list);
+
+out:
+	mutex_unlock(&rp_flist_mutex);
+	return ret;
 }
 
 /* Returns 0 on success, error-code otherwise. */
 static ssize_t insert_report_filterlist(const char *func)
 {
-	unsigned long flags;
 	unsigned long addr = kallsyms_lookup_name(func);
 	ssize_t ret = 0;
+	struct report_filterlist *new_list, *old_list;
+	unsigned long *new_addrs;
 
 	if (!addr) {
 		pr_err("could not find function: '%s'\n", func);
 		return -ENOENT;
 	}
 
-	spin_lock_irqsave(&report_filterlist_lock, flags);
+	new_list = kzalloc(sizeof(*new_list), GFP_KERNEL);
+	if (!new_list)
+		return -ENOMEM;
+
+	mutex_lock(&rp_flist_mutex);
+	old_list = rcu_dereference_protected(rp_flist,
+					   lockdep_is_held(&rp_flist_mutex));
+	memcpy(new_list, old_list, sizeof(struct report_filterlist));
 
-	if (report_filterlist.addrs == NULL) {
+	if (new_list->addrs == NULL) {
 		/* initial allocation */
-		report_filterlist.addrs =
-			kmalloc_array(report_filterlist.size,
-				      sizeof(unsigned long), GFP_ATOMIC);
-		if (report_filterlist.addrs == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
-	} else if (report_filterlist.used == report_filterlist.size) {
+		new_list->addrs =
+			kmalloc_array(new_list->size,
+					  sizeof(unsigned long), GFP_KERNEL);
+		if (new_list->addrs == NULL)
+			goto out_free;
+	} else if (new_list->used == new_list->size) {
 		/* resize filterlist */
-		size_t new_size = report_filterlist.size * 2;
-		unsigned long *new_addrs =
-			krealloc(report_filterlist.addrs,
-				 new_size * sizeof(unsigned long), GFP_ATOMIC);
-
-		if (new_addrs == NULL) {
-			/* leave filterlist itself untouched */
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		report_filterlist.size = new_size;
-		report_filterlist.addrs = new_addrs;
+		size_t new_size = new_list->size * 2;
+
+		new_addrs = kmalloc_array(new_size,
+					  sizeof(unsigned long), GFP_KERNEL);
+		if (new_addrs == NULL)
+			goto out_free;
+
+		memcpy(new_addrs, old_list->addrs,
+				old_list->size * sizeof(unsigned long));
+		new_list->size = new_size;
+		new_list->addrs = new_addrs;
+	} else {
+		new_addrs = kmalloc_array(new_list->size,
+					  sizeof(unsigned long), GFP_KERNEL);
+		if (new_addrs == NULL)
+			goto out_free;
+
+		memcpy(new_addrs, old_list->addrs,
+				old_list->size * sizeof(unsigned long));
+		new_list->addrs = new_addrs;
 	}
 
 	/* Note: deduplicating should be done in userspace. */
-	report_filterlist.addrs[report_filterlist.used++] = addr;
-	report_filterlist.sorted = false;
-
+	new_list->addrs[new_list->used++] = addr;
+	new_list->sorted = false;
+
+	rcu_assign_pointer(rp_flist, new_list);
+	synchronize_rcu();
+	kfree(old_list->addrs);
+	kfree(old_list);
+	goto out;
+
+out_free:
+	ret = -ENOMEM;
+	kfree(new_list);
 out:
-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
-
+	mutex_unlock(&rp_flist_mutex);
 	return ret;
 }
 
 static int show_info(struct seq_file *file, void *v)
 {
 	int i;
-	unsigned long flags;
+	struct report_filterlist *list;
 
 	/* show stats */
 	seq_printf(file, "enabled: %i\n", READ_ONCE(kcsan_enabled));
@@ -203,14 +243,15 @@ static int show_info(struct seq_file *file, void *v)
 			   atomic_long_read(&kcsan_counters[i]));
 	}
 
+	rcu_read_lock();
+	list = rcu_dereference(rp_flist);
 	/* show filter functions, and filter type */
-	spin_lock_irqsave(&report_filterlist_lock, flags);
 	seq_printf(file, "\n%s functions: %s\n",
-		   report_filterlist.whitelist ? "whitelisted" : "blacklisted",
-		   report_filterlist.used == 0 ? "none" : "");
-	for (i = 0; i < report_filterlist.used; ++i)
-		seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]);
-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
+		   list->whitelist ? "whitelisted" : "blacklisted",
+		   list->used == 0 ? "none" : "");
+	for (i = 0; i < list->used; ++i)
+		seq_printf(file, " %ps\n", (void *)list->addrs[i]);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -269,6 +310,19 @@ static const struct file_operations debugfs_ops =
 
 static int __init kcsan_debugfs_init(void)
 {
+	struct report_filterlist *list;
+
+	list = kzalloc(sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return -ENOMEM;
+
+	list->addrs		= NULL;
+	list->size		= 8;		/* small initial size */
+	list->used		= 0;
+	list->sorted	= false;
+	list->whitelist	= false;	/* default is blacklist */
+	rcu_assign_pointer(rp_flist, list);
+
 	debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops);
 	return 0;
 }
-- 
2.15.2



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ