netdev - Re: [PATCH 3/3] bpf: hash: use per-bucket spinlock

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5680FD21.30108@iogearbox.net>
Date:	Mon, 28 Dec 2015 10:13:05 +0100
From:	Daniel Borkmann <daniel@...earbox.net>
To:	Ming Lei <tom.leiming@...il.com>, linux-kernel@...r.kernel.org,
	Alexei Starovoitov <ast@...nel.org>
CC:	"David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
	Ming Lei <ming.lei@...onical.com>
Subject: Re: [PATCH 3/3] bpf: hash: use per-bucket spinlock

On 12/26/2015 10:31 AM, Ming Lei wrote:
> From: Ming Lei <ming.lei@...onical.com>
>
> Both htab_map_update_elem() and htab_map_delete_elem() can be
> called from eBPF program, and they may be in kernel hot path,
> so it isn't efficient to use a per-hashtable lock in this two
> helpers.
>
> The per-hashtable spinlock is used just for protecting bucket's
> hlist, and per-bucket lock should be enough. This patch converts
> the per-hashtable lock into per-bucket spinlock, so that contention
> can be decreased a lot.
>
> Signed-off-by: Ming Lei <ming.lei@...onical.com>
> ---
>   kernel/bpf/hashtab.c | 35 +++++++++++++++++++++++++----------
>   1 file changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index d857fcb..66bad7a 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -14,9 +14,14 @@
>   #include <linux/filter.h>
>   #include <linux/vmalloc.h>
>
> +struct bucket {
> +	struct hlist_head head;
> +	raw_spinlock_t lock;
> +};
> +
>   struct bpf_htab {
>   	struct bpf_map map;
> -	struct hlist_head *buckets;
> +	struct bucket *buckets;
>   	raw_spinlock_t lock;

Shouldn't the lock member be removed from here?

>   	atomic_t count;	/* number of elements in this hashtable */
>   	u32 n_buckets;	/* number of hash buckets */
> @@ -88,24 +93,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
>   		/* make sure page count doesn't overflow */
>   		goto free_htab;
>
> -	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
> +	htab->map.pages = round_up(htab->n_buckets * sizeof(struct bucket) +
>   				   htab->elem_size * htab->map.max_entries,
>   				   PAGE_SIZE) >> PAGE_SHIFT;
>
>   	err = -ENOMEM;
> -	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
> +	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket),
>   				      GFP_USER | __GFP_NOWARN);
>
>   	if (!htab->buckets) {
> -		htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head));
> +		htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket));
>   		if (!htab->buckets)
>   			goto free_htab;
>   	}
>
> -	for (i = 0; i < htab->n_buckets; i++)
> -		INIT_HLIST_HEAD(&htab->buckets[i]);
> +	for (i = 0; i < htab->n_buckets; i++) {
> +		INIT_HLIST_HEAD(&htab->buckets[i].head);
> +		raw_spin_lock_init(&htab->buckets[i].lock);
> +	}
>
> -	raw_spin_lock_init(&htab->lock);
>   	atomic_set(&htab->count, 0);
>
>   	return &htab->map;
> @@ -120,11 +126,16 @@ static inline u32 htab_map_hash(const void *key, u32 key_len)
>   	return jhash(key, key_len, 0);
>   }
>
> -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
> +static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
>   {
>   	return &htab->buckets[hash & (htab->n_buckets - 1)];
>   }
>
> +static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
> +{
> +	return &__select_bucket(htab, hash)->head;
> +}
> +
>   static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
>   					 void *key, u32 key_size)
>   {
> @@ -227,6 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
>   	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
>   	struct htab_elem *l_new, *l_old;
>   	struct hlist_head *head;
> +	struct bucket *b;
>   	unsigned long flags;
>   	u32 key_size;
>   	int ret;
> @@ -248,7 +260,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
>   	memcpy(l_new->key + round_up(key_size, 8), value, map->value_size);
>
>   	l_new->hash = htab_map_hash(l_new->key, key_size);
> -	head = select_bucket(htab, l_new->hash);
> +	b = __select_bucket(htab, l_new->hash);
> +	head = &b->head;
>
>   	/* bpf_map_update_elem() can be called in_irq() */
>   	raw_spin_lock_irqsave(&htab->lock, flags);

I am a bit confused on this one, though.

The raw spin lock is still per hashtable (htab->lock), and has not been converted in
this patch to the per bucket one (as opposed to what the commit message says), so
this patch seems to go into the right direction, but is a bit incomplete? Shouldn't
the above f.e. take b->lock, etc?

> @@ -299,6 +312,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
>   {
>   	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
>   	struct hlist_head *head;
> +	struct bucket *b;
>   	struct htab_elem *l;
>   	unsigned long flags;
>   	u32 hash, key_size;
> @@ -309,7 +323,8 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
>   	key_size = map->key_size;
>
>   	hash = htab_map_hash(key, key_size);
> -	head = select_bucket(htab, hash);
> +	b = __select_bucket(htab, hash);
> +	head = &b->head;
>
>   	raw_spin_lock_irqsave(&htab->lock, flags);
>

Same here?

Thanks,
Daniel
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html