[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <vbfr2326la0.fsf@mellanox.com>
Date: Thu, 24 Oct 2019 18:39:07 +0000
From: Vlad Buslov <vladbu@...lanox.com>
To: John Hurley <john.hurley@...ronome.com>
CC: Vlad Buslov <vladbu@...lanox.com>, Jiri Pirko <jiri@...lanox.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"simon.horman@...ronome.com" <simon.horman@...ronome.com>,
"jakub.kicinski@...ronome.com" <jakub.kicinski@...ronome.com>,
"louis.peens@...ronome.com" <louis.peens@...ronome.com>,
"oss-drivers@...ronome.com" <oss-drivers@...ronome.com>
Subject: Re: [RFC net-next 1/1] net: sched: prevent duplicate flower rules
from tcf_proto destroy race
On Thu 24 Oct 2019 at 14:01, John Hurley <john.hurley@...ronome.com> wrote:
> When a new filter is added to cls_api, the function
> tcf_chain_tp_insert_unique() looks up the protocol/priority/chain to
> determine if the tcf_proto is duplicated in the chain's hashtable. It then
> creates a new entry or continues with an existing one. In cls_flower, this
> allows the function fl_ht_insert_unque to determine if a filter is a
> duplicate and reject appropriately, meaning that the duplicate will not be
> passed to drivers via the offload hooks. However, when a tcf_proto is
> destroyed it is removed from its chain before a hardware remove hook is
> hit. This can lead to a race whereby the driver has not received the
> remove message but duplicate flows can be accepted. This, in turn, can
> lead to the offload driver receiving incorrect duplicate flows and out of
> order add/delete messages.
>
> Prevent duplicates by utilising an approach suggested by Vlad Buslov. A
> hash table per block stores each unique chain/protocol/prio being
> destroyed. This entry is only removed when the full destroy (and hardware
> offload) has completed. If a new flow is being added with the same
> identiers as a tc_proto being detroyed, then the add request is replayed
> until the destroy is complete.
>
> Fixes: 8b64678e0af8 ("net: sched: refactor tp insert/delete for concurrent execution")
> Signed-off-by: John Hurley <john.hurley@...ronome.com>
> Reviewed-by: Simon Horman <simon.horman@...ronome.com>
> Reported-by: Louis Peens <louis.peens@...ronome.com>
> ---
Hi John,
Thanks again for doing this!
> include/net/sch_generic.h | 3 ++
> net/sched/cls_api.c | 108 ++++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 107 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 637548d..363d2de 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -414,6 +414,9 @@ struct tcf_block {
> struct list_head filter_chain_list;
> } chain0;
> struct rcu_head rcu;
> + struct rhashtable proto_destroy_ht;
> + struct rhashtable_params proto_destroy_ht_params;
> + struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
> };
>
> #ifdef CONFIG_PROVE_LOCKING
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 8717c0b..7f7095a 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -47,6 +47,77 @@ static LIST_HEAD(tcf_proto_base);
> /* Protects list of registered TC modules. It is pure SMP lock. */
> static DEFINE_RWLOCK(cls_mod_lock);
>
> +struct tcf_destroy_proto {
> + struct destroy_key {
> + u32 chain_index;
> + u32 prio;
> + __be16 protocol;
> + } key;
> + struct rhash_head ht_node;
> +};
> +
> +static const struct rhashtable_params destroy_ht_params = {
> + .key_offset = offsetof(struct tcf_destroy_proto, key),
> + .key_len = offsetofend(struct destroy_key, protocol),
> + .head_offset = offsetof(struct tcf_destroy_proto, ht_node),
> + .automatic_shrinking = true,
> +};
> +
> +static void
> +tcf_proto_signal_destroying(struct tcf_chain *chain, struct tcf_proto *tp)
> +{
> + struct tcf_block *block = chain->block;
> + struct tcf_destroy_proto *entry;
> +
> + entry = kzalloc(sizeof(*entry), GFP_KERNEL);
Did you consider putting tcf_proto instance itself into
block->proto_destroy_ht? Might simplify the code since entry is
destroyed together with tcf_proto by tcf_proto_destroy() anyway. If such
approach is possible (I might be missing some obvious corner cases), it
might also be a good idea to use old-good intrusive hash table from
hashtable.h which will allow whole solution not to use dynamic memory
allocation at all.
> + if (!entry)
> + return;
> +
> + entry->key.chain_index = chain->index;
> + entry->key.prio = tp->prio;
> + entry->key.protocol = tp->protocol;
> +
> + mutex_lock(&block->proto_destroy_lock);
> + /* If key already exists or lookup errors out then free new entry. */
> + if (rhashtable_lookup_get_insert_fast(&block->proto_destroy_ht,
> + &entry->ht_node,
> + block->proto_destroy_ht_params))
> + kfree(entry);
> + mutex_unlock(&block->proto_destroy_lock);
> +}
> +
> +static struct tcf_destroy_proto *
> +tcf_proto_lookup_destroying(struct tcf_block *block, u32 chain_idx, u32 prio,
> + __be16 proto)
> +{
> + struct destroy_key key;
> +
> + key.chain_index = chain_idx;
> + key.prio = prio;
> + key.protocol = proto;
> +
> + return rhashtable_lookup_fast(&block->proto_destroy_ht, &key,
> + block->proto_destroy_ht_params);
> +}
> +
> +static void
> +tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
> +{
> + struct tcf_block *block = chain->block;
> + struct tcf_destroy_proto *entry;
> +
> + mutex_lock(&block->proto_destroy_lock);
> + entry = tcf_proto_lookup_destroying(block, chain->index, tp->prio,
> + tp->protocol);
> + if (entry) {
> + rhashtable_remove_fast(&block->proto_destroy_ht,
> + &entry->ht_node,
> + block->proto_destroy_ht_params);
> + kfree(entry);
> + }
> + mutex_unlock(&block->proto_destroy_lock);
> +}
> +
> /* Find classifier type by string name */
>
> static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
> @@ -234,9 +305,11 @@ static void tcf_proto_get(struct tcf_proto *tp)
> static void tcf_chain_put(struct tcf_chain *chain);
>
> static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
> - struct netlink_ext_ack *extack)
> + bool sig_destroy, struct netlink_ext_ack *extack)
> {
> tp->ops->destroy(tp, rtnl_held, extack);
> + if (sig_destroy)
> + tcf_proto_signal_destroyed(tp->chain, tp);
> tcf_chain_put(tp->chain);
> module_put(tp->ops->owner);
> kfree_rcu(tp, rcu);
> @@ -246,7 +319,7 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
> struct netlink_ext_ack *extack)
> {
> if (refcount_dec_and_test(&tp->refcnt))
> - tcf_proto_destroy(tp, rtnl_held, extack);
> + tcf_proto_destroy(tp, rtnl_held, true, extack);
> }
>
> static int walker_check_empty(struct tcf_proto *tp, void *fh,
> @@ -370,6 +443,8 @@ static bool tcf_chain_detach(struct tcf_chain *chain)
> static void tcf_block_destroy(struct tcf_block *block)
> {
> mutex_destroy(&block->lock);
> + rhashtable_destroy(&block->proto_destroy_ht);
> + mutex_destroy(&block->proto_destroy_lock);
> kfree_rcu(block, rcu);
> }
>
> @@ -545,6 +620,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
>
> mutex_lock(&chain->filter_chain_lock);
> tp = tcf_chain_dereference(chain->filter_chain, chain);
> + while (tp) {
> + tp_next = rcu_dereference_protected(tp->next, 1);
> + tcf_proto_signal_destroying(chain, tp);
> + tp = tp_next;
> + }
> + tp = tcf_chain_dereference(chain->filter_chain, chain);
> RCU_INIT_POINTER(chain->filter_chain, NULL);
> tcf_chain0_head_change(chain, NULL);
> chain->flushing = true;
> @@ -857,6 +938,16 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> /* Don't store q pointer for blocks which are shared */
> if (!tcf_block_shared(block))
> block->q = q;
> +
> + block->proto_destroy_ht_params = destroy_ht_params;
> + if (rhashtable_init(&block->proto_destroy_ht,
> + &block->proto_destroy_ht_params)) {
> + NL_SET_ERR_MSG(extack, "Failed to initialise block proto destroy hashtable");
> + kfree(block);
> + return ERR_PTR(-ENOMEM);
> + }
> + mutex_init(&block->proto_destroy_lock);
> +
> return block;
> }
>
> @@ -1621,6 +1712,13 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
>
> mutex_lock(&chain->filter_chain_lock);
>
> + if (tcf_proto_lookup_destroying(chain->block, chain->index, prio,
> + protocol)) {
Function tcf_proto_lookup_destroying() is called with
block->proto_destroy_lock protection previously in the code. I assume it
is also needed here.
> + mutex_unlock(&chain->filter_chain_lock);
> + tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
> + return ERR_PTR(-EAGAIN);
> + }
> +
> tp = tcf_chain_tp_find(chain, &chain_info,
> protocol, prio, false);
> if (!tp)
> @@ -1628,10 +1726,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
> mutex_unlock(&chain->filter_chain_lock);
>
> if (tp) {
> - tcf_proto_destroy(tp_new, rtnl_held, NULL);
> + tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
> tp_new = tp;
> } else if (err) {
> - tcf_proto_destroy(tp_new, rtnl_held, NULL);
> + tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
> tp_new = ERR_PTR(err);
> }
>
> @@ -1669,6 +1767,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
> return;
> }
>
> + tcf_proto_signal_destroying(chain, tp);
> next = tcf_chain_dereference(chain_info.next, chain);
> if (tp == chain->filter_chain)
> tcf_chain0_head_change(chain, next);
> @@ -2188,6 +2287,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
> err = -EINVAL;
> goto errout_locked;
> } else if (t->tcm_handle == 0) {
> + tcf_proto_signal_destroying(chain, tp);
> tcf_chain_tp_remove(chain, &chain_info, tp);
> mutex_unlock(&chain->filter_chain_lock);
Powered by blists - more mailing lists