lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20241008062639.2632455-1-ranxiaokai627@163.com>
Date: Tue,  8 Oct 2024 06:26:39 +0000
From: Ran Xiaokai <ranxiaokai627@....com>
To: elver@...gle.com
Cc: dvyukov@...gle.com,
	kasan-dev@...glegroups.com,
	linux-kernel@...r.kernel.org,
	ran.xiaokai@....com.cn,
	ranxiaokai627@....com,
	tglx@...utronix.de
Subject: Re: [PATCH 3/4] kcsan, debugfs: fix atomic sleep by converting spinlock_t to rcu lock

>> -	spin_lock_irqsave(&report_filterlist_lock, flags);
>> -	if (report_filterlist.used == 0)
>> +	rcu_read_lock();
>> +	list = rcu_dereference(rp_flist);
>> +
>> +	if (!list)
>> +		goto out;
>> +
>> +	if (list->used == 0)
>>  		goto out;
>>  
>>  	/* Sort array if it is unsorted, and then do a binary search. */
>> -	if (!report_filterlist.sorted) {
>> -		sort(report_filterlist.addrs, report_filterlist.used,
>> +	if (!list->sorted) {
>> +		sort(list->addrs, list->used,
>>  		     sizeof(unsigned long), cmp_filterlist_addrs, NULL);
>> -		report_filterlist.sorted = true;
>> +		list->sorted = true;
>>  	}
>
>This used to be under the report_filterlist_lock, but now there's no
>protection against this happening concurrently.
>
>Sure, at the moment, this is not a problem, because this function is
>only called under the report_lock which serializes it. Is that intended?
>
>> -	ret = !!bsearch(&func_addr, report_filterlist.addrs,
>> -			report_filterlist.used, sizeof(unsigned long),
>> +	ret = !!bsearch(&func_addr, list->addrs,
>> +			list->used, sizeof(unsigned long),
>>  			cmp_filterlist_addrs);
>> -	if (report_filterlist.whitelist)
>> +	if (list->whitelist)
>>  		ret = !ret;
>[...]
>> +
>> +	memcpy(new_list, old_list, sizeof(struct report_filterlist));
>> +	new_list->whitelist = whitelist;
>> +
>> +	rcu_assign_pointer(rp_flist, new_list);
>> +	synchronize_rcu();
>> +	kfree(old_list);
>
>Why not kfree_rcu()?
>
>> +out:
>> +	mutex_unlock(&rp_flist_mutex);
>> +	return ret;
>>  }
>[...]
>> +	} else {
>> +		new_addrs = kmalloc_array(new_list->size,
>> +					  sizeof(unsigned long), GFP_KERNEL);
>> +		if (new_addrs == NULL)
>> +			goto out_free;
>> +
>> +		memcpy(new_addrs, old_list->addrs,
>> +				old_list->size * sizeof(unsigned long));
>> +		new_list->addrs = new_addrs;
>>  	}
>
>Wait, for every insertion it ends up copying the list now? That's very
>wasteful.
>
>In general, this solution seems overly complex, esp. the part where it
>ends up copying the whole list on _every_ insertion.
>
>If the whole point is to avoid kmalloc() under the lock, we can do
>something much simpler.
>
>Please test the patch below - it's much simpler, and in the common case
>I expect it to rarely throw away the preemptive allocation done outside
>the critical section because concurrent insertions by the user should be
>rarely done.

I have tested this, it works.
Yes, this patch is much simpler. 
Another consideration for me to convert the spinlock to a RCU lock was that
this would reduce the irq-latency when kcsan_skip_report_debugfs() called from
hard-irq context, but as you said, insertions by the user should not be a frequent 
operation, this should not be a problem. 

>Thanks,
>-- Marco
>
>------ >8 ------
>
>From: Marco Elver <elver@...gle.com>
>Date: Tue, 1 Oct 2024 16:00:45 +0200
>Subject: [PATCH] kcsan: turn report_filterlist_lock into a raw_spinlock
>
><tbd... please test>
>
>Reported-by: Ran Xiaokai <ran.xiaokai@....com.cn>
>Signed-off-by: Marco Elver <elver@...gle.com>
>---
> kernel/kcsan/debugfs.c | 76 +++++++++++++++++++++---------------------
> 1 file changed, 38 insertions(+), 38 deletions(-)
>
>diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
>index 1d1d1b0e4248..5ffb6cc5298b 100644
>--- a/kernel/kcsan/debugfs.c
>+++ b/kernel/kcsan/debugfs.c
>@@ -46,14 +46,8 @@ static struct {
> 	int		used;		/* number of elements used */
> 	bool		sorted;		/* if elements are sorted */
> 	bool		whitelist;	/* if list is a blacklist or whitelist */
>-} report_filterlist = {
>-	.addrs		= NULL,
>-	.size		= 8,		/* small initial size */
>-	.used		= 0,
>-	.sorted		= false,
>-	.whitelist	= false,	/* default is blacklist */
>-};
>-static DEFINE_SPINLOCK(report_filterlist_lock);
>+} report_filterlist;
>+static DEFINE_RAW_SPINLOCK(report_filterlist_lock);
> 
> /*
>  * The microbenchmark allows benchmarking KCSAN core runtime only. To run
>@@ -110,7 +104,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
> 		return false;
> 	func_addr -= offset; /* Get function start */
> 
>-	spin_lock_irqsave(&report_filterlist_lock, flags);
>+	raw_spin_lock_irqsave(&report_filterlist_lock, flags);
> 	if (report_filterlist.used == 0)
> 		goto out;
> 
>@@ -127,7 +121,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr)
> 		ret = !ret;
> 
> out:
>-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
>+	raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
> 	return ret;
> }
> 
>@@ -135,9 +129,9 @@ static void set_report_filterlist_whitelist(bool whitelist)
> {
> 	unsigned long flags;
> 
>-	spin_lock_irqsave(&report_filterlist_lock, flags);
>+	raw_spin_lock_irqsave(&report_filterlist_lock, flags);
> 	report_filterlist.whitelist = whitelist;
>-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
>+	raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
> }
> 
> /* Returns 0 on success, error-code otherwise. */
>@@ -145,6 +139,9 @@ static ssize_t insert_report_filterlist(const char *func)
> {
> 	unsigned long flags;
> 	unsigned long addr = kallsyms_lookup_name(func);
>+	unsigned long *delay_free = NULL;
>+	unsigned long *new_addrs = NULL;
>+	size_t new_size = 0;
> 	ssize_t ret = 0;
> 
> 	if (!addr) {
>@@ -152,32 +149,33 @@ static ssize_t insert_report_filterlist(const char *func)
> 		return -ENOENT;
> 	}
> 
>-	spin_lock_irqsave(&report_filterlist_lock, flags);
>+retry_alloc:
>+	/*
>+	 * Check if we need an allocation, and re-validate under the lock. Since
>+	 * the report_filterlist_lock is a raw, cannot allocate under the lock.
>+	 */
>+	if (data_race(report_filterlist.used == report_filterlist.size)) {
>+		new_size = (report_filterlist.size ?: 4) * 2;
>+		delay_free = new_addrs = kmalloc_array(new_size, sizeof(unsigned long), GFP_KERNEL);
>+		if (!new_addrs)
>+			return -ENOMEM;
>+	}
> 
>-	if (report_filterlist.addrs == NULL) {
>-		/* initial allocation */
>-		report_filterlist.addrs =
>-			kmalloc_array(report_filterlist.size,
>-				      sizeof(unsigned long), GFP_ATOMIC);
>-		if (report_filterlist.addrs == NULL) {
>-			ret = -ENOMEM;
>-			goto out;
>-		}
>-	} else if (report_filterlist.used == report_filterlist.size) {
>-		/* resize filterlist */
>-		size_t new_size = report_filterlist.size * 2;
>-		unsigned long *new_addrs =
>-			krealloc(report_filterlist.addrs,
>-				 new_size * sizeof(unsigned long), GFP_ATOMIC);
>-
>-		if (new_addrs == NULL) {
>-			/* leave filterlist itself untouched */
>-			ret = -ENOMEM;
>-			goto out;
>+	raw_spin_lock_irqsave(&report_filterlist_lock, flags);
>+	if (report_filterlist.used == report_filterlist.size) {
>+		/* Check we pre-allocated enough, and retry if not. */
>+		if (report_filterlist.used >= new_size) {
>+			raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
>+			kfree(new_addrs); /* kfree(NULL) is safe */
>+			delay_free = new_addrs = NULL;
>+			goto retry_alloc;
> 		}
> 
>+		if (report_filterlist.used)
>+			memcpy(new_addrs, report_filterlist.addrs, report_filterlist.used * sizeof(unsigned long));
>+		delay_free = report_filterlist.addrs; /* free the old list */
>+		report_filterlist.addrs = new_addrs;  /* switch to the new list */
> 		report_filterlist.size = new_size;
>-		report_filterlist.addrs = new_addrs;
> 	}
> 
> 	/* Note: deduplicating should be done in userspace. */
>@@ -185,8 +183,10 @@ static ssize_t insert_report_filterlist(const char *func)
> 		kallsyms_lookup_name(func);
> 	report_filterlist.sorted = false;
> 
>-out:
>-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
>+	raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
>+
>+	if (delay_free)
>+		kfree(delay_free);
> 
> 	return ret;
> }
>@@ -204,13 +204,13 @@ static int show_info(struct seq_file *file, void *v)
> 	}
> 
> 	/* show filter functions, and filter type */
>-	spin_lock_irqsave(&report_filterlist_lock, flags);
>+	raw_spin_lock_irqsave(&report_filterlist_lock, flags);
> 	seq_printf(file, "\n%s functions: %s\n",
> 		   report_filterlist.whitelist ? "whitelisted" : "blacklisted",
> 		   report_filterlist.used == 0 ? "none" : "");
> 	for (i = 0; i < report_filterlist.used; ++i)
> 		seq_printf(file, " %ps\n", (void *)report_filterlist.addrs[i]);
>-	spin_unlock_irqrestore(&report_filterlist_lock, flags);
>+	raw_spin_unlock_irqrestore(&report_filterlist_lock, flags);
> 
> 	return 0;
> }
>-- 
>2.46.1.824.gd892dcdcdd-goog
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ