netdev - Re: [PATCH net-next 1/3] bpf: Implement map_delete_elem for BPF_MAP_TYPE_LPM

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <3e537b56-d0f2-de9a-5bb1-f60fbfc11ca5@fb.com>
Date:   Mon, 18 Sep 2017 15:53:23 -0700
From:   Alexei Starovoitov <ast@...com>
To:     Craig Gallek <kraigatgoog@...il.com>,
        Daniel Mack <daniel@...que.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        "David S . Miller" <davem@...emloft.net>
CC:     <netdev@...r.kernel.org>
Subject: Re: [PATCH net-next 1/3] bpf: Implement map_delete_elem for
 BPF_MAP_TYPE_LPM_TRIE

On 9/18/17 12:30 PM, Craig Gallek wrote:
> From: Craig Gallek <kraig@...gle.com>
>
> This is a simple non-recursive delete operation.  It prunes paths
> of empty nodes in the tree, but it does not try to further compress
> the tree as nodes are removed.
>
> Signed-off-by: Craig Gallek <kraig@...gle.com>
> ---
>  kernel/bpf/lpm_trie.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
> index 1b767844a76f..9d58a576b2ae 100644
> --- a/kernel/bpf/lpm_trie.c
> +++ b/kernel/bpf/lpm_trie.c
> @@ -389,10 +389,84 @@ static int trie_update_elem(struct bpf_map *map,
>  	return ret;
>  }
>
> -static int trie_delete_elem(struct bpf_map *map, void *key)
> +/* Called from syscall or from eBPF program */
> +static int trie_delete_elem(struct bpf_map *map, void *_key)
>  {
> -	/* TODO */
> -	return -ENOSYS;
> +	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
> +	struct bpf_lpm_trie_key *key = _key;
> +	struct lpm_trie_node __rcu **trim;
> +	struct lpm_trie_node *node;
> +	unsigned long irq_flags;
> +	unsigned int next_bit;
> +	size_t matchlen = 0;
> +	int ret = 0;
> +
> +	if (key->prefixlen > trie->max_prefixlen)
> +		return -EINVAL;
> +
> +	raw_spin_lock_irqsave(&trie->lock, irq_flags);
> +
> +	/* Walk the tree looking for an exact key/length match and keeping
> +	 * track of where we could begin trimming the tree.  The trim-point
> +	 * is the sub-tree along the walk consisting of only single-child
> +	 * intermediate nodes and ending at a leaf node that we want to
> +	 * remove.
> +	 */
> +	trim = &trie->root;
> +	node = rcu_dereference_protected(
> +		trie->root, lockdep_is_held(&trie->lock));
> +	while (node) {
> +		matchlen = longest_prefix_match(trie, node, key);
> +
> +		if (node->prefixlen != matchlen ||
> +		    node->prefixlen == key->prefixlen)
> +			break;

curious why there is no need to do
'node->prefixlen == trie->max_prefixlen' in the above
like update/lookup do?

> +
> +		next_bit = extract_bit(key->data, node->prefixlen);
> +		/* If we hit a node that has more than one child or is a valid
> +		 * prefix itself, do not remove it. Reset the root of the trim
> +		 * path to its descendant on our path.
> +		 */
> +		if (!(node->flags & LPM_TREE_NODE_FLAG_IM) ||
> +		    (node->child[0] && node->child[1]))
> +			trim = &node->child[next_bit];
> +		node = rcu_dereference_protected(
> +			node->child[next_bit], lockdep_is_held(&trie->lock));
> +	}
> +
> +	if (!node || node->prefixlen != key->prefixlen ||
> +	    (node->flags & LPM_TREE_NODE_FLAG_IM)) {
> +		ret = -ENOENT;
> +		goto out;
> +	}
> +
> +	trie->n_entries--;
> +
> +	/* If the node we are removing is not a leaf node, simply mark it
> +	 * as intermediate and we are done.
> +	 */
> +	if (rcu_access_pointer(node->child[0]) ||
> +	    rcu_access_pointer(node->child[1])) {
> +		node->flags |= LPM_TREE_NODE_FLAG_IM;
> +		goto out;
> +	}
> +
> +	/* trim should now point to the slot holding the start of a path from
> +	 * zero or more intermediate nodes to our leaf node for deletion.
> +	 */
> +	while ((node = rcu_dereference_protected(
> +			*trim, lockdep_is_held(&trie->lock)))) {
> +		RCU_INIT_POINTER(*trim, NULL);
> +		trim = rcu_access_pointer(node->child[0]) ?
> +			&node->child[0] :
> +			&node->child[1];
> +		kfree_rcu(node, rcu);

can it be that some of the nodes this loop walks have
both child[0] and [1] ?