netdev - Re: [RFC net-next 1/1] net: sched: prevent duplicate flower rules from tcf

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <vbfr2326la0.fsf@mellanox.com>
Date:   Thu, 24 Oct 2019 18:39:07 +0000
From:   Vlad Buslov <vladbu@...lanox.com>
To:     John Hurley <john.hurley@...ronome.com>
CC:     Vlad Buslov <vladbu@...lanox.com>, Jiri Pirko <jiri@...lanox.com>,
        "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        "simon.horman@...ronome.com" <simon.horman@...ronome.com>,
        "jakub.kicinski@...ronome.com" <jakub.kicinski@...ronome.com>,
        "louis.peens@...ronome.com" <louis.peens@...ronome.com>,
        "oss-drivers@...ronome.com" <oss-drivers@...ronome.com>
Subject: Re: [RFC net-next 1/1] net: sched: prevent duplicate flower rules
 from tcf_proto destroy race

On Thu 24 Oct 2019 at 14:01, John Hurley <john.hurley@...ronome.com> wrote:
> When a new filter is added to cls_api, the function
> tcf_chain_tp_insert_unique() looks up the protocol/priority/chain to
> determine if the tcf_proto is duplicated in the chain's hashtable. It then
> creates a new entry or continues with an existing one. In cls_flower, this
> allows the function fl_ht_insert_unque to determine if a filter is a
> duplicate and reject appropriately, meaning that the duplicate will not be
> passed to drivers via the offload hooks. However, when a tcf_proto is
> destroyed it is removed from its chain before a hardware remove hook is
> hit. This can lead to a race whereby the driver has not received the
> remove message but duplicate flows can be accepted. This, in turn, can
> lead to the offload driver receiving incorrect duplicate flows and out of
> order add/delete messages.
>
> Prevent duplicates by utilising an approach suggested by Vlad Buslov. A
> hash table per block stores each unique chain/protocol/prio being
> destroyed. This entry is only removed when the full destroy (and hardware
> offload) has completed. If a new flow is being added with the same
> identiers as a tc_proto being detroyed, then the add request is replayed
> until the destroy is complete.
>
> Fixes: 8b64678e0af8 ("net: sched: refactor tp insert/delete for concurrent execution")
> Signed-off-by: John Hurley <john.hurley@...ronome.com>
> Reviewed-by: Simon Horman <simon.horman@...ronome.com>
> Reported-by: Louis Peens <louis.peens@...ronome.com>
> ---

Hi John,

Thanks again for doing this!

>  include/net/sch_generic.h |   3 ++
>  net/sched/cls_api.c       | 108 ++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 107 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 637548d..363d2de 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -414,6 +414,9 @@ struct tcf_block {
>  		struct list_head filter_chain_list;
>  	} chain0;
>  	struct rcu_head rcu;
> +	struct rhashtable proto_destroy_ht;
> +	struct rhashtable_params proto_destroy_ht_params;
> +	struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
>  };
>
>  #ifdef CONFIG_PROVE_LOCKING
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 8717c0b..7f7095a 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -47,6 +47,77 @@ static LIST_HEAD(tcf_proto_base);
>  /* Protects list of registered TC modules. It is pure SMP lock. */
>  static DEFINE_RWLOCK(cls_mod_lock);
>
> +struct tcf_destroy_proto {
> +	struct destroy_key {
> +		u32 chain_index;
> +		u32 prio;
> +		__be16 protocol;
> +	} key;
> +	struct rhash_head ht_node;
> +};
> +
> +static const struct rhashtable_params destroy_ht_params = {
> +	.key_offset = offsetof(struct tcf_destroy_proto, key),
> +	.key_len = offsetofend(struct destroy_key, protocol),
> +	.head_offset = offsetof(struct tcf_destroy_proto, ht_node),
> +	.automatic_shrinking = true,
> +};
> +
> +static void
> +tcf_proto_signal_destroying(struct tcf_chain *chain, struct tcf_proto *tp)
> +{
> +	struct tcf_block *block = chain->block;
> +	struct tcf_destroy_proto *entry;
> +
> +	entry = kzalloc(sizeof(*entry), GFP_KERNEL);

Did you consider putting tcf_proto instance itself into
block->proto_destroy_ht? Might simplify the code since entry is
destroyed together with tcf_proto by tcf_proto_destroy() anyway. If such
approach is possible (I might be missing some obvious corner cases), it
might also be a good idea to use old-good intrusive hash table from
hashtable.h which will allow whole solution not to use dynamic memory
allocation at all.

> +	if (!entry)
> +		return;
> +
> +	entry->key.chain_index = chain->index;
> +	entry->key.prio = tp->prio;
> +	entry->key.protocol = tp->protocol;
> +
> +	mutex_lock(&block->proto_destroy_lock);
> +	/* If key already exists or lookup errors out then free new entry. */
> +	if (rhashtable_lookup_get_insert_fast(&block->proto_destroy_ht,
> +					      &entry->ht_node,
> +					      block->proto_destroy_ht_params))
> +		kfree(entry);
> +	mutex_unlock(&block->proto_destroy_lock);
> +}
> +
> +static struct tcf_destroy_proto *
> +tcf_proto_lookup_destroying(struct tcf_block *block, u32 chain_idx, u32 prio,
> +			    __be16 proto)
> +{
> +	struct destroy_key key;
> +
> +	key.chain_index = chain_idx;
> +	key.prio = prio;
> +	key.protocol = proto;
> +
> +	return rhashtable_lookup_fast(&block->proto_destroy_ht, &key,
> +				      block->proto_destroy_ht_params);
> +}
> +
> +static void
> +tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
> +{
> +	struct tcf_block *block = chain->block;
> +	struct tcf_destroy_proto *entry;
> +
> +	mutex_lock(&block->proto_destroy_lock);
> +	entry = tcf_proto_lookup_destroying(block, chain->index, tp->prio,
> +					    tp->protocol);
> +	if (entry) {
> +		rhashtable_remove_fast(&block->proto_destroy_ht,
> +				       &entry->ht_node,
> +				       block->proto_destroy_ht_params);
> +		kfree(entry);
> +	}
> +	mutex_unlock(&block->proto_destroy_lock);
> +}
> +
>  /* Find classifier type by string name */
>
>  static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
> @@ -234,9 +305,11 @@ static void tcf_proto_get(struct tcf_proto *tp)
>  static void tcf_chain_put(struct tcf_chain *chain);
>
>  static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
> -			      struct netlink_ext_ack *extack)
> +			      bool sig_destroy, struct netlink_ext_ack *extack)
>  {
>  	tp->ops->destroy(tp, rtnl_held, extack);
> +	if (sig_destroy)
> +		tcf_proto_signal_destroyed(tp->chain, tp);
>  	tcf_chain_put(tp->chain);
>  	module_put(tp->ops->owner);
>  	kfree_rcu(tp, rcu);
> @@ -246,7 +319,7 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
>  			  struct netlink_ext_ack *extack)
>  {
>  	if (refcount_dec_and_test(&tp->refcnt))
> -		tcf_proto_destroy(tp, rtnl_held, extack);
> +		tcf_proto_destroy(tp, rtnl_held, true, extack);
>  }
>
>  static int walker_check_empty(struct tcf_proto *tp, void *fh,
> @@ -370,6 +443,8 @@ static bool tcf_chain_detach(struct tcf_chain *chain)
>  static void tcf_block_destroy(struct tcf_block *block)
>  {
>  	mutex_destroy(&block->lock);
> +	rhashtable_destroy(&block->proto_destroy_ht);
> +	mutex_destroy(&block->proto_destroy_lock);
>  	kfree_rcu(block, rcu);
>  }
>
> @@ -545,6 +620,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
>
>  	mutex_lock(&chain->filter_chain_lock);
>  	tp = tcf_chain_dereference(chain->filter_chain, chain);
> +	while (tp) {
> +		tp_next = rcu_dereference_protected(tp->next, 1);
> +		tcf_proto_signal_destroying(chain, tp);
> +		tp = tp_next;
> +	}
> +	tp = tcf_chain_dereference(chain->filter_chain, chain);
>  	RCU_INIT_POINTER(chain->filter_chain, NULL);
>  	tcf_chain0_head_change(chain, NULL);
>  	chain->flushing = true;
> @@ -857,6 +938,16 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
>  	/* Don't store q pointer for blocks which are shared */
>  	if (!tcf_block_shared(block))
>  		block->q = q;
> +
> +	block->proto_destroy_ht_params = destroy_ht_params;
> +	if (rhashtable_init(&block->proto_destroy_ht,
> +			    &block->proto_destroy_ht_params)) {
> +		NL_SET_ERR_MSG(extack, "Failed to initialise block proto destroy hashtable");
> +		kfree(block);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +	mutex_init(&block->proto_destroy_lock);
> +
>  	return block;
>  }
>
> @@ -1621,6 +1712,13 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
>
>  	mutex_lock(&chain->filter_chain_lock);
>
> +	if (tcf_proto_lookup_destroying(chain->block, chain->index, prio,
> +					protocol)) {

Function tcf_proto_lookup_destroying() is called with
block->proto_destroy_lock protection previously in the code. I assume it
is also needed here.

> +		mutex_unlock(&chain->filter_chain_lock);
> +		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
> +		return ERR_PTR(-EAGAIN);
> +	}
> +
>  	tp = tcf_chain_tp_find(chain, &chain_info,
>  			       protocol, prio, false);
>  	if (!tp)
> @@ -1628,10 +1726,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
>  	mutex_unlock(&chain->filter_chain_lock);
>
>  	if (tp) {
> -		tcf_proto_destroy(tp_new, rtnl_held, NULL);
> +		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
>  		tp_new = tp;
>  	} else if (err) {
> -		tcf_proto_destroy(tp_new, rtnl_held, NULL);
> +		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
>  		tp_new = ERR_PTR(err);
>  	}
>
> @@ -1669,6 +1767,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
>  		return;
>  	}
>
> +	tcf_proto_signal_destroying(chain, tp);
>  	next = tcf_chain_dereference(chain_info.next, chain);
>  	if (tp == chain->filter_chain)
>  		tcf_chain0_head_change(chain, next);
> @@ -2188,6 +2287,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
>  		err = -EINVAL;
>  		goto errout_locked;
>  	} else if (t->tcm_handle == 0) {
> +		tcf_proto_signal_destroying(chain, tp);
>  		tcf_chain_tp_remove(chain, &chain_info, tp);
>  		mutex_unlock(&chain->filter_chain_lock);