lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <vbfr25e49jh.fsf@mellanox.com>
Date:   Wed, 21 Aug 2019 19:12:53 +0000
From:   Vlad Buslov <vladbu@...lanox.com>
To:     Matthew Wilcox <willy@...radead.org>
CC:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: Re: [PATCH 31/38] cls_flower: Use XArray marks instead of separate
 list


On Wed 21 Aug 2019 at 01:32, Matthew Wilcox <willy@...radead.org> wrote:
> From: "Matthew Wilcox (Oracle)" <willy@...radead.org>
>
> Remove the hw_filter list in favour of using one of the XArray mark
> bits which lets us iterate more efficiently than walking a linked list.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@...radead.org>
> ---
>  net/sched/cls_flower.c | 47 ++++++++++--------------------------------
>  1 file changed, 11 insertions(+), 36 deletions(-)
>
> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
> index 2a1999d2b507..4625de5e29a7 100644
> --- a/net/sched/cls_flower.c
> +++ b/net/sched/cls_flower.c
> @@ -85,11 +85,12 @@ struct fl_flow_tmplt {
>  	struct tcf_chain *chain;
>  };
>
> +#define HW_FILTER	XA_MARK_1
> +
>  struct cls_fl_head {
>  	struct rhashtable ht;
>  	spinlock_t masks_lock; /* Protect masks list */
>  	struct list_head masks;
> -	struct list_head hw_filters;
>  	struct rcu_work rwork;
>  	struct xarray filters;
>  };
> @@ -102,7 +103,6 @@ struct cls_fl_filter {
>  	struct tcf_result res;
>  	struct fl_flow_key key;
>  	struct list_head list;
> -	struct list_head hw_list;
>  	u32 handle;
>  	u32 flags;
>  	u32 in_hw_count;
> @@ -332,7 +332,6 @@ static int fl_init(struct tcf_proto *tp)
>
>  	spin_lock_init(&head->masks_lock);
>  	INIT_LIST_HEAD_RCU(&head->masks);
> -	INIT_LIST_HEAD(&head->hw_filters);
>  	rcu_assign_pointer(tp->root, head);
>  	xa_init_flags(&head->filters, XA_FLAGS_ALLOC1);
>
> @@ -421,7 +420,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
>
>  	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
>  	spin_lock(&tp->lock);
> -	list_del_init(&f->hw_list);
>  	tcf_block_offload_dec(block, &f->flags);
>  	spin_unlock(&tp->lock);
>
> @@ -433,7 +431,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
>  				struct cls_fl_filter *f, bool rtnl_held,
>  				struct netlink_ext_ack *extack)
>  {
> -	struct cls_fl_head *head = fl_head_dereference(tp);
>  	struct tcf_block *block = tp->chain->block;
>  	struct flow_cls_offload cls_flower = {};
>  	bool skip_sw = tc_skip_sw(f->flags);
> @@ -485,9 +482,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
>  		goto errout;
>  	}
>
> -	spin_lock(&tp->lock);
> -	list_add(&f->hw_list, &head->hw_filters);
> -	spin_unlock(&tp->lock);
>  errout:
>  	if (!rtnl_held)
>  		rtnl_unlock();
> @@ -1581,7 +1575,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>  		err = -ENOBUFS;
>  		goto errout_tb;
>  	}
> -	INIT_LIST_HEAD(&fnew->hw_list);
>  	refcount_set(&fnew->refcnt, 1);
>
>  	err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
> @@ -1698,6 +1691,11 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
>
>  	*arg = fnew;
>
> +	if (!tc_skip_hw(fnew->flags))
> +		xa_set_mark(&head->filters, fnew->handle, HW_FILTER);
> +	else if (fold)
> +		xa_clear_mark(&head->filters, fnew->handle, HW_FILTER);
> +

I like how xa mark simplifies reoffload handling, but this wouldn't
work anymore because without rtnl protection fl_change()/fl_delete() can
be called concurrently with fl_reoffload(). My original implementation
of unlocked flower classifier relied on idr in fl_reoffload() and we had
to introduce hw_list due to following race conditions:

- fl_reoffload() can miss fnew if it runs after fnew was provisioned to
  hardware with fl_hw_replace_filter() but before it is marked with
  HW_FILTER.

- Another race condition would be in __fl_delete() when filter is
  removed from xarray, then shared block is detached concurrently which
  causes fl_reoffload() that misses the filter, then the block callback
  is no longer present when fl_hw_destroy_filter() calls
  tc_setup_cb_call() and we have a dangling filter that can't be removed
  from hardware anymore.

That is why filter must be added to hw_list where it is done now - in
fl_hw*() functions while holding rtnl lock to prevent concurrent
reoffload (block bind/unbind always take rtnl). I guess
marking/unmarking filters as HW_FILTER in exactly the same places where
it is inserted/removed from hw_list would also work.

>  	kfree(tb);
>  	tcf_queue_work(&mask->rwork, fl_uninit_mask_free_work);
>  	return 0;
> @@ -1770,37 +1768,14 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
>  	arg->cookie = id;
>  }
>
> -static struct cls_fl_filter *
> -fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
> -{
> -	struct cls_fl_head *head = fl_head_dereference(tp);
> -
> -	spin_lock(&tp->lock);
> -	if (list_empty(&head->hw_filters)) {
> -		spin_unlock(&tp->lock);
> -		return NULL;
> -	}
> -
> -	if (!f)
> -		f = list_entry(&head->hw_filters, struct cls_fl_filter,
> -			       hw_list);
> -	list_for_each_entry_continue(f, &head->hw_filters, hw_list) {
> -		if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) {
> -			spin_unlock(&tp->lock);
> -			return f;
> -		}
> -	}
> -
> -	spin_unlock(&tp->lock);
> -	return NULL;
> -}
> -
>  static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
>  			void *cb_priv, struct netlink_ext_ack *extack)
>  {
> +	struct cls_fl_head *head = fl_head_dereference(tp);
>  	struct tcf_block *block = tp->chain->block;
>  	struct flow_cls_offload cls_flower = {};
> -	struct cls_fl_filter *f = NULL;
> +	struct cls_fl_filter *f;
> +	unsigned long handle;
>  	int err;
>
>  	/* hw_filters list can only be changed by hw offload functions after
> @@ -1809,7 +1784,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
>  	 */
>  	ASSERT_RTNL();
>
> -	while ((f = fl_get_next_hw_filter(tp, f, add))) {
> +	xa_for_each_marked(&head->filters, handle, f, HW_FILTER) {
>  		cls_flower.rule =
>  			flow_rule_alloc(tcf_exts_num_actions(&f->exts));
>  		if (!cls_flower.rule) {

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ