netdev - Re: [net-next PATCH 02/15] net: sched: allow qdiscs to handle locking

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1471986499.14381.56.camel@edumazet-glaptop3.roam.corp.google.com>
Date:   Tue, 23 Aug 2016 14:08:19 -0700
From:   Eric Dumazet <eric.dumazet@...il.com>
To:     John Fastabend <john.fastabend@...il.com>
Cc:     jhs@...atatu.com, davem@...emloft.net, brouer@...hat.com,
        xiyou.wangcong@...il.com, alexei.starovoitov@...il.com,
        john.r.fastabend@...el.com, netdev@...r.kernel.org
Subject: Re: [net-next PATCH 02/15] net: sched: allow qdiscs to handle
 locking

On Tue, 2016-08-23 at 13:23 -0700, John Fastabend wrote:
> This patch adds a flag for queueing disciplines to indicate the stack
> does not need to use the qdisc lock to protect operations. This can
> be used to build lockless scheduling algorithms and improving
> performance.
> 
> The flag is checked in the tx path and the qdisc lock is only taken
> if it is not set. For now use a conditional if statement. Later we
> could be more aggressive if it proves worthwhile and use a static key
> or wrap this in a likely().
> 
> Also the lockless case drops the TCQ_F_CAN_BYPASS logic. The reason
> for this is synchronizing a qlen counter across threads proves to
> cost more than doing the enqueue/dequeue operations when tested with
> pktgen.
> 
> Signed-off-by: John Fastabend <john.r.fastabend@...el.com>
> ---
>  include/net/sch_generic.h |    1 +
>  net/core/dev.c            |   26 ++++++++++++++++++++++----
>  net/sched/sch_generic.c   |   24 ++++++++++++++++--------
>  3 files changed, 39 insertions(+), 12 deletions(-)
> 
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 909aff2..3de6a8c 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -58,6 +58,7 @@ struct Qdisc {
>  #define TCQ_F_NOPARENT		0x40 /* root of its hierarchy :
>  				      * qdisc_tree_decrease_qlen() should stop.
>  				      */
> +#define TCQ_F_NOLOCK		0x80 /* qdisc does not require locking */
>  	u32			limit;
>  	const struct Qdisc_ops	*ops;
>  	struct qdisc_size_table	__rcu *stab;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index bcd4cdd..a0effa6 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3076,6 +3076,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
>  	int rc;
>  
>  	qdisc_calculate_pkt_len(skb, q);
> +
> +	if (q->flags & TCQ_F_NOLOCK) {
> +		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
> +			__qdisc_drop(skb, &to_free);
> +			rc = NET_XMIT_DROP;
> +		} else {
> +			rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
> +			__qdisc_run(q);
> +		}
> +
> +		if (unlikely(to_free))
> +			kfree_skb_list(to_free);
> +		return rc;
> +	}
> +
>  	/*
>  	 * Heuristic to force contended enqueues to serialize on a
>  	 * separate lock before trying to get qdisc main lock.
> @@ -3898,19 +3913,22 @@ static void net_tx_action(struct softirq_action *h)
>  
>  		while (head) {
>  			struct Qdisc *q = head;
> -			spinlock_t *root_lock;
> +			spinlock_t *root_lock = NULL;
>  
>  			head = head->next_sched;
>  
> -			root_lock = qdisc_lock(q);
> -			spin_lock(root_lock);
> +			if (!(q->flags & TCQ_F_NOLOCK)) {
> +				root_lock = qdisc_lock(q);
> +				spin_lock(root_lock);
> +			}
>  			/* We need to make sure head->next_sched is read
>  			 * before clearing __QDISC_STATE_SCHED
>  			 */
>  			smp_mb__before_atomic();
>  			clear_bit(__QDISC_STATE_SCHED, &q->state);
>  			qdisc_run(q);
> -			spin_unlock(root_lock);
> +			if (!(q->flags & TCQ_F_NOLOCK))

This might be faster to use : 
	if (root_lock) (one less memory read and mask)

> +				spin_unlock(root_lock);
>  		}
>  	}
>  }
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index e305a55..af32418 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -170,7 +170,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
>  	int ret = NETDEV_TX_BUSY;
>  
>  	/* And release qdisc */
> -	spin_unlock(root_lock);
> +	if (!(q->flags & TCQ_F_NOLOCK))
> +		spin_unlock(root_lock);

You might use the same trick, if root_lock is NULL for lockless qdisc.
>  
>  	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
>  	if (validate)
> @@ -183,10 +184,13 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
>  
>  		HARD_TX_UNLOCK(dev, txq);
>  	} else {
> -		spin_lock(root_lock);
> +		if (!(q->flags & TCQ_F_NOLOCK))
> +			spin_lock(root_lock);
>  		return qdisc_qlen(q);
>  	}
> -	spin_lock(root_lock);
> +
> +	if (!(q->flags & TCQ_F_NOLOCK))
> +		spin_lock(root_lock);
>  
>  	if (dev_xmit_complete(ret)) {
>  		/* Driver sent out skb successfully or skb was consumed */
> @@ -866,14 +870,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
>  
>  		dev_queue = netdev_get_tx_queue(dev, i);
>  		q = dev_queue->qdisc_sleeping;
> -		root_lock = qdisc_lock(q);
>  
> -		spin_lock_bh(root_lock);
> +		if (q->flags & TCQ_F_NOLOCK) {
> +			val = test_bit(__QDISC_STATE_SCHED, &q->state);
> +		} else {
> +			root_lock = qdisc_lock(q);
> +			spin_lock_bh(root_lock);
>  
> -		val = (qdisc_is_running(q) ||
> -		       test_bit(__QDISC_STATE_SCHED, &q->state));
> +			val = (qdisc_is_running(q) ||
> +			       test_bit(__QDISC_STATE_SCHED, &q->state));
>  
> -		spin_unlock_bh(root_lock);
> +			spin_unlock_bh(root_lock);
> +		}
>  
>  		if (val)
>  			return true;
>