netdev - Re: unregister_netdevice: waiting for DEV to become free (2)

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87r2217971.fsf@toke.dk>
Date:   Thu, 21 Nov 2019 12:36:18 +0100
From:   Toke Høiland-Jørgensen <toke@...hat.com>
To:     Tetsuo Handa <penguin-kernel@...ove.sakura.ne.jp>
Cc:     Alexei Starovoitov <alexei.starovoitov@...il.com>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        syzbot <syzbot+30209ea299c09d8785c9@...kaller.appspotmail.com>,
        ddstreet@...e.org, Dmitry Vyukov <dvyukov@...gle.com>,
        LKML <linux-kernel@...r.kernel.org>,
        Network Development <netdev@...r.kernel.org>,
        syzkaller-bugs <syzkaller-bugs@...glegroups.com>,
        bpf <bpf@...r.kernel.org>, Yonghong Song <yhs@...com>
Subject: Re: unregister_netdevice: waiting for DEV to become free (2)

Tetsuo Handa <penguin-kernel@...ove.sakura.ne.jp> writes:

> Hello.
>
> syzbot is still reporting that bpf(BPF_MAP_UPDATE_ELEM) causes
> unregister_netdevice() to hang. It seems that commit 546ac1ffb70d25b5
> ("bpf: add devmap, a map for storing net device references") assigned
> dtab->netdev_map[i] at dev_map_update_elem() but commit 6f9d451ab1a33728
> ("xdp: Add devmap_hash map type for looking up devices by hashed index")
> forgot to assign dtab->netdev_map[idx] at __dev_map_hash_update_elem()
> when dev is newly allocated by __dev_map_alloc_node(). As far as I and
> syzbot tested, https://syzkaller.appspot.com/x/patch.diff?x=140dd206e00000
> can avoid the problem, but I don't know whether this is right location to
> assign it. Please check and fix.

Hi Tetsuo

Sorry for missing this email last week :(

I think the issue is not a missing update of dtab->netdev_map (that is
not used at all for DEVMAP_HASH), but rather that dev_map_free() is not
cleaning up properly for DEVMAP_HASH types. Could you please check if
the patch below helps?

-Toke

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3867864cdc2f..42ccfcb38424 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -74,7 +74,7 @@ struct bpf_dtab_netdev {
 
 struct bpf_dtab {
 	struct bpf_map map;
-	struct bpf_dtab_netdev **netdev_map;
+	struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
 	struct list_head __percpu *flush_list;
 	struct list_head list;
 
@@ -101,6 +101,12 @@ static struct hlist_head *dev_map_create_hash(unsigned int entries)
 	return hash;
 }
 
+static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
+						    int idx)
+{
+	return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
+}
+
 static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 {
 	int err, cpu;
@@ -143,24 +149,22 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 	for_each_possible_cpu(cpu)
 		INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu));
 
-	dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
-					      sizeof(struct bpf_dtab_netdev *),
-					      dtab->map.numa_node);
-	if (!dtab->netdev_map)
-		goto free_percpu;
-
 	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
 		dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
 		if (!dtab->dev_index_head)
-			goto free_map_area;
+			goto free_percpu;
 
 		spin_lock_init(&dtab->index_lock);
+	} else {
+		dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
+						      sizeof(struct bpf_dtab_netdev *),
+						      dtab->map.numa_node);
+		if (!dtab->netdev_map)
+			goto free_percpu;
 	}
 
 	return 0;
 
-free_map_area:
-	bpf_map_area_free(dtab->netdev_map);
 free_percpu:
 	free_percpu(dtab->flush_list);
 free_charge:
@@ -228,21 +232,40 @@ static void dev_map_free(struct bpf_map *map)
 			cond_resched();
 	}
 
-	for (i = 0; i < dtab->map.max_entries; i++) {
-		struct bpf_dtab_netdev *dev;
+	if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+		for (i = 0; i < dtab->n_buckets; i++) {
+			struct bpf_dtab_netdev *dev;
+			struct hlist_head *head;
+			struct hlist_node *next;
 
-		dev = dtab->netdev_map[i];
-		if (!dev)
-			continue;
+			head = dev_map_index_hash(dtab, i);
 
-		free_percpu(dev->bulkq);
-		dev_put(dev->dev);
-		kfree(dev);
+			hlist_for_each_entry_safe(dev, next, head, index_hlist) {
+				hlist_del_rcu(&dev->index_hlist);
+				free_percpu(dev->bulkq);
+				dev_put(dev->dev);
+				kfree(dev);
+			}
+		}
+
+		kfree(dtab->dev_index_head);
+	} else {
+		for (i = 0; i < dtab->map.max_entries; i++) {
+			struct bpf_dtab_netdev *dev;
+
+			dev = dtab->netdev_map[i];
+			if (!dev)
+				continue;
+
+			free_percpu(dev->bulkq);
+			dev_put(dev->dev);
+			kfree(dev);
+		}
+
+		bpf_map_area_free(dtab->netdev_map);
 	}
 
 	free_percpu(dtab->flush_list);
-	bpf_map_area_free(dtab->netdev_map);
-	kfree(dtab->dev_index_head);
 	kfree(dtab);
 }
 
@@ -263,12 +286,6 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
-static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
-						    int idx)
-{
-	return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
-}
-
 struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);