lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 16 Apr 2019 17:20:47 +0300
From:   Vlad Buslov <vladbu@...lanox.com>
To:     jakub.kicinski@...ronome.com
Cc:     netdev@...r.kernel.org, jhs@...atatu.com, xiyou.wangcong@...il.com,
        jiri@...nulli.us, davem@...emloft.net,
        Vlad Buslov <vladbu@...lanox.com>
Subject: [RFC PATCH net-next] net: sched: flower: refactor reoffload for concurrent access

Recent changes that introduced unlocked flower did not properly account for
case when reoffload is initiated concurrently with filter updates. To fix
the issue, extend flower with 'hw_filters' list that is used to store
filters that don't have 'skip_hw' flag set. Filter is added to the list
before it is inserted to hardware and only removed from it after last
reference to filter has been released. This ensures that concurrent
reoffload can still access filter that is being deleted and removes race
condition when driver callback can be removed when filter is no longer
accessible trough idr, but is still present in hardware.

Refactor fl_change() to insert filter to hw_list before offloading it to
hardware and to properly cleanup filter in case of error with __fl_put().
This allows for concurrent access to filter from fl_reoffload() and
protects it with reference counting. Refactor fl_reoffload() to iterate
over hw_filters list instead of idr. Implement fl_get_next_hw_filter()
helper function that is used to iterate over hw_filters list with reference
counting and skips filters that are being concurrently deleted.

Fixes: 92149190067d ("net: sched: flower: set unlocked flag for flower proto ops")
Signed-off-by: Vlad Buslov <vladbu@...lanox.com>
---
 net/sched/cls_flower.c | 82 +++++++++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 21 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 4b5585358699..e0f921c537ab 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -90,6 +90,7 @@ struct cls_fl_head {
 	struct rhashtable ht;
 	spinlock_t masks_lock; /* Protect masks list */
 	struct list_head masks;
+	struct list_head hw_filters;
 	struct rcu_work rwork;
 	struct idr handle_idr;
 };
@@ -102,6 +103,7 @@ struct cls_fl_filter {
 	struct tcf_result res;
 	struct fl_flow_key key;
 	struct list_head list;
+	struct list_head hw_list;
 	u32 handle;
 	u32 flags;
 	u32 in_hw_count;
@@ -315,6 +317,7 @@ static int fl_init(struct tcf_proto *tp)
 
 	spin_lock_init(&head->masks_lock);
 	INIT_LIST_HEAD_RCU(&head->masks);
+	INIT_LIST_HEAD(&head->hw_filters);
 	rcu_assign_pointer(tp->root, head);
 	idr_init(&head->handle_idr);
 
@@ -485,12 +488,15 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
 	return rcu_dereference_raw(tp->root);
 }
 
-static void __fl_put(struct cls_fl_filter *f)
+static void __fl_put(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
 	if (!refcount_dec_and_test(&f->refcnt))
 		return;
 
-	WARN_ON(!f->deleted);
+	spin_lock(&tp->lock);
+	if (!list_empty(&f->hw_list))
+		list_del(&f->hw_list);
+	spin_unlock(&tp->lock);
 
 	if (tcf_exts_get_net(&f->exts))
 		tcf_queue_work(&f->rwork, fl_destroy_filter_work);
@@ -554,7 +560,7 @@ static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
 	if (!tc_skip_hw(f->flags))
 		fl_hw_destroy_filter(tp, f, rtnl_held, extack);
 	tcf_unbind_filter(tp, &f->res);
-	__fl_put(f);
+	__fl_put(tp, f);
 
 	return 0;
 }
@@ -595,7 +601,7 @@ static void fl_put(struct tcf_proto *tp, void *arg)
 {
 	struct cls_fl_filter *f = arg;
 
-	__fl_put(f);
+	__fl_put(tp, f);
 }
 
 static void *fl_get(struct tcf_proto *tp, u32 handle)
@@ -1522,6 +1528,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		err = -ENOBUFS;
 		goto errout_tb;
 	}
+	INIT_LIST_HEAD(&fnew->hw_list);
 	refcount_set(&fnew->refcnt, 1);
 
 	err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
@@ -1551,6 +1558,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		goto errout_mask;
 
 	if (!tc_skip_hw(fnew->flags)) {
+		spin_lock(&tp->lock);
+		list_add(&fnew->hw_list, &head->hw_filters);
+		spin_unlock(&tp->lock);
+
 		err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
 		if (err)
 			goto errout_ht;
@@ -1569,7 +1580,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		goto errout_hw;
 	}
 
-	refcount_inc(&fnew->refcnt);
 	if (fold) {
 		/* Fold filter was deleted concurrently. Retry lookup. */
 		if (fold->deleted) {
@@ -1591,6 +1601,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 			in_ht = true;
 		}
 
+		refcount_inc(&fnew->refcnt);
 		rhashtable_remove_fast(&fold->mask->ht,
 				       &fold->ht_node,
 				       fold->mask->filter_ht_params);
@@ -1608,7 +1619,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		 * after this.
 		 */
 		refcount_dec(&fold->refcnt);
-		__fl_put(fold);
+		__fl_put(tp, fold);
 	} else {
 		if (handle) {
 			/* user specifies a handle and it doesn't exist */
@@ -1631,6 +1642,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		if (err)
 			goto errout_hw;
 
+		refcount_inc(&fnew->refcnt);
 		fnew->handle = handle;
 		list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
 		spin_unlock(&tp->lock);
@@ -1642,26 +1654,27 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(mask);
 	return 0;
 
+errout_ht:
+	spin_lock(&tp->lock);
 errout_hw:
+	fnew->deleted = true;
 	spin_unlock(&tp->lock);
 	if (!tc_skip_hw(fnew->flags))
 		fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
-errout_ht:
 	if (in_ht)
 		rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
 				       fnew->mask->filter_ht_params);
 errout_mask:
 	fl_mask_put(head, fnew->mask);
 errout:
-	tcf_exts_get_net(&fnew->exts);
-	tcf_queue_work(&fnew->rwork, fl_destroy_filter_work);
+	__fl_put(tp, fnew);
 errout_tb:
 	kfree(tb);
 errout_mask_alloc:
 	kfree(mask);
 errout_fold:
 	if (fold)
-		__fl_put(fold);
+		__fl_put(tp, fold);
 	return err;
 }
 
@@ -1675,7 +1688,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
 
 	err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack);
 	*last = list_empty(&head->masks);
-	__fl_put(f);
+	__fl_put(tp, f);
 
 	return err;
 }
@@ -1689,33 +1702,61 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 
 	while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) {
 		if (arg->fn(tp, f, arg) < 0) {
-			__fl_put(f);
+			__fl_put(tp, f);
 			arg->stop = 1;
 			break;
 		}
-		__fl_put(f);
+		__fl_put(tp, f);
 		arg->cookie++;
 		arg->count++;
 	}
 }
 
+static struct cls_fl_filter *
+fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
+{
+	struct cls_fl_head *head = fl_head_dereference(tp);
+
+	spin_lock(&tp->lock);
+	if (!f) {
+		if (list_empty(&head->hw_filters)) {
+			spin_unlock(&tp->lock);
+			return NULL;
+		}
+
+		f = list_first_entry(&head->hw_filters, struct cls_fl_filter,
+				     hw_list);
+	} else {
+		f = list_next_entry(f, hw_list);
+	}
+
+	list_for_each_entry_from(f, &head->hw_filters, hw_list) {
+		if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) {
+			spin_unlock(&tp->lock);
+			return f;
+		}
+	}
+
+	spin_unlock(&tp->lock);
+	return NULL;
+}
+
 static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = tp->chain->block;
-	unsigned long handle = 0;
-	struct cls_fl_filter *f;
+	struct cls_fl_filter *f = NULL;
 	int err;
 
-	while ((f = fl_get_next_filter(tp, &handle))) {
+	while ((f = fl_get_next_hw_filter(tp, f, add))) {
 		if (tc_skip_hw(f->flags))
 			goto next_flow;
 
 		cls_flower.rule =
 			flow_rule_alloc(tcf_exts_num_actions(&f->exts));
 		if (!cls_flower.rule) {
-			__fl_put(f);
+			__fl_put(tp, f);
 			return -ENOMEM;
 		}
 
@@ -1733,7 +1774,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			kfree(cls_flower.rule);
 			if (tc_skip_sw(f->flags)) {
 				NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
-				__fl_put(f);
+				__fl_put(tp, f);
 				return err;
 			}
 			goto next_flow;
@@ -1746,7 +1787,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 
 		if (err) {
 			if (add && tc_skip_sw(f->flags)) {
-				__fl_put(f);
+				__fl_put(tp, f);
 				return err;
 			}
 			goto next_flow;
@@ -1757,8 +1798,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 					  add);
 		spin_unlock(&tp->lock);
 next_flow:
-		handle++;
-		__fl_put(f);
+		__fl_put(tp, f);
 	}
 
 	return 0;
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ