lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  3 Apr 2019 13:37:05 +0100
From:   John Hurley <john.hurley@...ronome.com>
To:     jiri@...lanox.com, davem@...emloft.net, xiyou.wangcong@...il.com
Cc:     netdev@...r.kernel.org, vladbu@...lanox.com,
        oss-drivers@...ronome.com, John Hurley <john.hurley@...ronome.com>
Subject: [RFC net-next 1/1] net: sched: fix hw filter offload in tc flower

Recent refactoring of fl_change aims to use the classifier spinlock to
avoid the need for rtnl lock. In doing so, the fl_hw_replace_filer()
function was moved to before the lock is taken. This can create problems
for drivers if duplicate filters are created (commmon in ovs tc offload
due to filters being triggered by user-space matches).

Drivers registered for such filters will now receive multiple copies of
the same rule, each with a different cookie value. This means that the
drivers would need to do a full match field lookup to determine
duplicates, repeating work that will happen in flower __fl_lookup().
Currently, drivers do not expect to receive duplicate filters.

Fix this by moving the hw_replace_function to after the software filter
processing. This way, offload messages are only triggered after they are
verified. Add a new flag to each filter that indicates that it is being
sent to hw. This prevents the flow from being deleted before processing is
finished, even if the tp lock is released.

NOTE:
There may still be a race condition here with the reoffload function. When
the __SENDING_TO_HW bit is set we do not know if the filter has actually
been sent to HW or not at time of reoffload. This means we could
potentially not offload a valid flow here (or not delete one).

Fixes: 620da4860827 ("net: sched: flower: refactor fl_change")
Signed-off-by: John Hurley <john.hurley@...ronome.com>
Reviewed-by: Simon Horman <simon.horman@...ronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
---
 net/sched/cls_flower.c | 110 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 90 insertions(+), 20 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0638f17..e2c144f 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -94,6 +94,10 @@ struct cls_fl_head {
 	struct idr handle_idr;
 };
 
+enum cls_fl_filter_state_t {
+	__SENDING_TO_HW,
+};
+
 struct cls_fl_filter {
 	struct fl_flow_mask *mask;
 	struct rhash_head ht_node;
@@ -113,6 +117,7 @@ struct cls_fl_filter {
 	 */
 	refcount_t refcnt;
 	bool deleted;
+	unsigned long atomic_flags;
 };
 
 static const struct rhashtable_params mask_ht_params = {
@@ -542,12 +547,18 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
 
 	*last = false;
 
+replay:
 	spin_lock(&tp->lock);
 	if (f->deleted) {
 		spin_unlock(&tp->lock);
 		return -ENOENT;
 	}
 
+	if (test_bit(__SENDING_TO_HW, &f->atomic_flags)) {
+		spin_unlock(&tp->lock);
+		goto replay;
+	}
+
 	f->deleted = true;
 	rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
 			       f->mask->filter_ht_params);
@@ -1528,15 +1539,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto errout;
 
-	if (!tc_skip_hw(fnew->flags)) {
-		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
-		if (err)
-			goto errout_mask;
-	}
-
-	if (!tc_in_hw(fnew->flags))
-		fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
-
 	spin_lock(&tp->lock);
 
 	/* tp was deleted concurrently. -EAGAIN will cause caller to lookup
@@ -1544,15 +1546,18 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	 */
 	if (tp->deleting) {
 		err = -EAGAIN;
-		goto errout_hw;
+		goto errout_mask;
 	}
 
 	refcount_inc(&fnew->refcnt);
 	if (fold) {
-		/* Fold filter was deleted concurrently. Retry lookup. */
-		if (fold->deleted) {
+		/* Fold filter was deleted or is being added to hw concurrently.
+		 * Retry lookup.
+		 */
+		if (fold->deleted ||
+		    test_bit(__SENDING_TO_HW, &fold->atomic_flags)) {
 			err = -EAGAIN;
-			goto errout_hw;
+			goto errout_mask;
 		}
 
 		fnew->handle = handle;
@@ -1560,7 +1565,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
 					     fnew->mask->filter_ht_params);
 		if (err)
-			goto errout_hw;
+			goto errout_mask;
 
 		rhashtable_remove_fast(&fold->mask->ht,
 				       &fold->ht_node,
@@ -1568,9 +1573,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		idr_replace(&head->handle_idr, fnew, fnew->handle);
 		list_replace_rcu(&fold->list, &fnew->list);
 		fold->deleted = true;
+		if (!tc_skip_hw(fnew->flags))
+			set_bit(__SENDING_TO_HW, &fnew->atomic_flags);
 
 		spin_unlock(&tp->lock);
 
+		if (!tc_skip_hw(fnew->flags)) {
+			err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
+			if (err) {
+				spin_lock(&tp->lock);
+				goto errout_hw_replace;
+			}
+		}
+
+		if (!tc_in_hw(fnew->flags))
+			fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
+		clear_bit(__SENDING_TO_HW, &fnew->atomic_flags);
 		fl_mask_put(head, fold->mask, true);
 		if (!tc_skip_hw(fold->flags))
 			fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
@@ -1584,7 +1603,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	} else {
 		if (__fl_lookup(fnew->mask, &fnew->mkey)) {
 			err = -EEXIST;
-			goto errout_hw;
+			goto errout_mask;
 		}
 
 		if (handle) {
@@ -1606,7 +1625,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 					    INT_MAX, GFP_ATOMIC);
 		}
 		if (err)
-			goto errout_hw;
+			goto errout_mask;
 
 		fnew->handle = handle;
 
@@ -1616,7 +1635,22 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 			goto errout_idr;
 
 		list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
+		if (!tc_skip_hw(fnew->flags))
+			set_bit(__SENDING_TO_HW, &fnew->atomic_flags);
+
 		spin_unlock(&tp->lock);
+
+		if (!tc_skip_hw(fnew->flags)) {
+			err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
+			if (err) {
+				spin_lock(&tp->lock);
+				goto errout_rhash;
+			}
+		}
+
+		if (!tc_in_hw(fnew->flags))
+			fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+		clear_bit(__SENDING_TO_HW, &fnew->atomic_flags);
 	}
 
 	*arg = fnew;
@@ -1625,13 +1659,37 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(mask);
 	return 0;
 
+errout_hw_replace:
+	if (rhashtable_insert_fast(&fold->mask->ht, &fold->ht_node,
+				   fold->mask->filter_ht_params)) {
+		NL_SET_ERR_MSG(extack, "Filter replace failed and could not revert.");
+		fl_mask_put(head, fold->mask, true);
+		if (!tc_skip_hw(fold->flags))
+			fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
+		tcf_unbind_filter(tp, &fold->res);
+		tcf_exts_get_net(&fold->exts);
+		refcount_dec(&fold->refcnt);
+		__fl_put(fold);
+	} else {
+		idr_replace(&head->handle_idr, fold, fold->handle);
+		list_replace_rcu(&fnew->list, &fold->list);
+		fold->deleted = false;
+	}
+errout_rhash:
+	if (fnew->deleted) {
+		spin_unlock(&tp->lock);
+		kfree(tb);
+		kfree(mask);
+		return err;
+	}
+	list_del_rcu(&fnew->list);
+	rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
+			       fnew->mask->filter_ht_params);
+	fnew->deleted = true;
 errout_idr:
 	idr_remove(&head->handle_idr, fnew->handle);
-errout_hw:
-	spin_unlock(&tp->lock);
-	if (!tc_skip_hw(fnew->flags))
-		fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
 errout_mask:
+	spin_unlock(&tp->lock);
 	fl_mask_put(head, fnew->mask, true);
 errout:
 	tcf_exts_destroy(&fnew->exts);
@@ -1669,6 +1727,12 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 	arg->count = arg->skip;
 
 	while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) {
+		if (test_bit(__SENDING_TO_HW, &f->atomic_flags)) {
+			__fl_put(f);
+			arg->cookie++;
+			continue;
+		}
+
 		if (arg->fn(tp, f, arg) < 0) {
 			__fl_put(f);
 			arg->stop = 1;
@@ -1695,6 +1759,9 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			if (tc_skip_hw(f->flags))
 				continue;
 
+			if (test_bit(__SENDING_TO_HW, &f->atomic_flags))
+				continue;
+
 			cls_flower.rule =
 				flow_rule_alloc(tcf_exts_num_actions(&f->exts));
 			if (!cls_flower.rule)
@@ -2273,6 +2340,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (!f)
 		return skb->len;
 
+	if (test_bit(__SENDING_TO_HW, &f->atomic_flags))
+		return skb->len;
+
 	t->tcm_handle = f->handle;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
-- 
2.7.4

Powered by blists - more mailing lists