lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 25 Apr 2019 21:32:47 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Yuyang Du <duyuyang@...il.com>
Cc:     will.deacon@....com, mingo@...nel.org, bvanassche@....org,
        ming.lei@...hat.com, frederic@...nel.org, tglx@...utronix.de,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH 19/28] locking/lockdep: Optimize irq usage check when
 marking lock usage bit

On Wed, Apr 24, 2019 at 06:19:25PM +0800, Yuyang Du wrote:

After only a quick read of these next patches; this is the one that
worries me most.

You did mention Frederic's patches, but I'm not entirely sure you're
aware why he's doing them. He's preparing to split the softirq state
into one state per softirq vector.

See here:

  https://lkml.kernel.org/r/20190228171242.32144-14-frederic@kernel.org
  https://lkml.kernel.org/r/20190228171242.32144-15-frederic@kernel.org

IOW he's going to massively explode this storage.


> diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
> index 7c2fefa..0e209b8 100644
> --- a/include/linux/lockdep.h
> +++ b/include/linux/lockdep.h
> @@ -72,6 +72,11 @@ struct lock_trace {
>  };
>  
>  #define LOCKSTAT_POINTS		4
> +/*
> + * For hardirq and softirq, each has one for irqsafe lock that reaches
> + * this lock and one for irqsafe-read lock that reaches this lock.
> + */
> +#define LOCK_IRQ_SAFE_LOCKS	4

This is going to be 2*(1+10) if I counted correctly.

>  /*
>   * The lock-class itself. The order of the structure members matters.
> @@ -114,6 +119,15 @@ struct lock_class {
>  	int				name_version;
>  	const char			*name;
>  
> +	/*
> +	 * irqsafe_lock indicates whether there is an IRQ safe lock
> +	 * reaches this lock_class in the dependency graph, and if so
> +	 * points to it; irqsafe_distance measures the distance from the
> +	 * IRQ safe locks, used for keeping the shortest.
> +	 */
> +	struct lock_class		*irqsafe_lock[LOCK_IRQ_SAFE_LOCKS];
> +	int				irqsafe_distance[LOCK_IRQ_SAFE_LOCKS];

Which makes this 264 additional bytes to struct lock_class, which is
immense, given that we're talking about 8192 instances of this, for a
total of over 2M of data.

> +
>  #ifdef CONFIG_LOCK_STAT
>  	unsigned long			contention_point[LOCKSTAT_POINTS];
>  	unsigned long			contending_point[LOCKSTAT_POINTS];
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index 6e79bd6..a3138c9 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -1101,6 +1101,7 @@ static bool is_dynamic_key(const struct lock_class_key *key)
>  	struct lockdep_subclass_key *key;
>  	struct hlist_head *hash_head;
>  	struct lock_class *class;
> +	int i;
>  
>  	DEBUG_LOCKS_WARN_ON(!irqs_disabled());
>  
> @@ -1153,6 +1154,9 @@ static bool is_dynamic_key(const struct lock_class_key *key)
>  	WARN_ON_ONCE(!list_empty(&class->locks_before));
>  	WARN_ON_ONCE(!list_empty(&class->locks_after));
>  	class->name_version = count_matching_names(class);
> +	for (i = 0; i < ARRAY_SIZE(class->irqsafe_distance); i++)
> +		class->irqsafe_distance[i] = INT_MAX;
> +
>  	/*
>  	 * We use RCU's safe list-add method to make
>  	 * parallel walking of the hash-list safe:
> @@ -2896,6 +2900,10 @@ static void print_usage_bug_scenario(struct held_lock *lock)
>  	return 1;
>  }
>  
> +static DECLARE_BITMAP(lock_classes_hardirq_safe, MAX_LOCKDEP_KEYS);
> +static DECLARE_BITMAP(lock_classes_hardirq_safe_read, MAX_LOCKDEP_KEYS);
> +static DECLARE_BITMAP(lock_classes_softirq_safe, MAX_LOCKDEP_KEYS);
> +static DECLARE_BITMAP(lock_classes_softirq_safe_read, MAX_LOCKDEP_KEYS);

That will need being written like:

#define LOCKDEP_STATE(__STATE)								\
	static DECLARE_BITMAP(lock_classes_##__STATE##_safe, MAX_LOCKDEP_KEYS);		\
	static DECLARE_BITMAP(lock_classes_##__STATE##_safe_read, MAX_LOCKDEP_KEYS);
#include "lockdep_states.h"
#undef LOCKDEP_STATE

And will thereby generate 22 bitmaps, each being 1kb of storage.

>  /*
>   * print irq inversion bug:
> @@ -5001,6 +5009,12 @@ void __init lockdep_init(void)
>  		+ sizeof(lock_chains)
>  		+ sizeof(lock_chains_in_use)
>  		+ sizeof(chain_hlocks)
> +#ifdef CONFIG_TRACE_IRQFLAGS
> +		+ sizeof(lock_classes_hardirq_safe)
> +		+ sizeof(lock_classes_hardirq_safe_read)
> +		+ sizeof(lock_classes_softirq_safe)
> +		+ sizeof(lock_classes_softirq_safe_read)

another macro generator here.

> +#endif
>  #endif
>  		) / 1024
>  		);

Now, I would normally not care too much about memory costs for a debug
feature, except there's architectures that need to keep their image size
small, see the comment that goes along with CONFIG_LOCKDEP_SMALL in
lockdep_internals.h.


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ