lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251112132950.60a165f9@gandalf.local.home>
Date: Wed, 12 Nov 2025 13:29:50 -0500
From: Steven Rostedt <rostedt@...dmis.org>
To: Yongliang Gao <leonylgao@...il.com>
Cc: mhiramat@...nel.org, mathieu.desnoyers@...icios.com,
 bigeasy@...utronix.de, linux-kernel@...r.kernel.org,
 linux-trace-kernel@...r.kernel.org, frankjpliu@...cent.com, Yongliang Gao
 <leonylgao@...cent.com>, Huang Cun <cunhuang@...cent.com>
Subject: Re: [PATCH v2] trace/pid_list: optimize pid_list->lock contention

On Thu, 13 Nov 2025 02:14:56 +0800
Yongliang Gao <leonylgao@...il.com> wrote:

> @@ -271,7 +277,6 @@ int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid,
>  {
>  	union upper_chunk *upper_chunk;
>  	union lower_chunk *lower_chunk;
> -	unsigned long flags;
>  	unsigned int upper1;
>  	unsigned int upper2;
>  	unsigned int lower;
> @@ -282,27 +287,44 @@ int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid,
>  	if (pid_split(pid, &upper1, &upper2, &lower) < 0)
>  		return -EINVAL;
>  
> -	raw_spin_lock_irqsave(&pid_list->lock, flags);
> -	for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) {
> -		upper_chunk = pid_list->upper[upper1];
> +	do {
> +		unsigned int start_upper1 = upper1;
> +		unsigned int start_upper2 = upper2;
> +		unsigned int start_lower = lower;
> +		unsigned int seq;
>  
> -		if (!upper_chunk)
> -			continue;
> +		seq = read_seqcount_begin(&pid_list->seqcount);
>  
> -		for (; upper2 <= UPPER_MASK; upper2++, lower = 0) {
> -			lower_chunk = upper_chunk->data[upper2];
> -			if (!lower_chunk)
> +		for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) {
> +			upper_chunk = pid_list->upper[upper1];
> +
> +			if (!upper_chunk)
>  				continue;
>  
> -			lower = find_next_bit(lower_chunk->data, LOWER_MAX,
> -					    lower);
> -			if (lower < LOWER_MAX)
> -				goto found;
> +			for (; upper2 <= UPPER_MASK; upper2++, lower = 0) {
> +				lower_chunk = upper_chunk->data[upper2];
> +				if (!lower_chunk)
> +					continue;
> +
> +				lower = find_next_bit(lower_chunk->data, LOWER_MAX,
> +						    lower);
> +				if (lower < LOWER_MAX)
> +					goto found;
> +			}
>  		}
> -	}
>  
> +		upper1 = UPPER_MASK + 1; /* Mark as not found */
>   found:
> -	raw_spin_unlock_irqrestore(&pid_list->lock, flags);
> +		if (read_seqcount_retry(&pid_list->seqcount, seq)) {
> +			/* Retry if write happened during read */
> +			upper1 = start_upper1;
> +			upper2 = start_upper2;
> +			lower = start_lower;
> +			continue;
> +		}
> +		break;
> +	} while (1);
> +
>  	if (upper1 > UPPER_MASK)
>  		return -1;

Honestly, I wouldn't modify trace_pid_list_next(). It is simply used for
printing of the pids in /sys/kernel/tracing/set_*_pid files. It's not a
critical section, and it shouldn't affect scheduling like
trace_pid_list_is_set() does.

Just keep it taking the raw spin lock, as that will keep it from being
modified by the writers. It also keeps the code from becoming more complex.

-- Steve


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ