lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <024e7fb3-9b53-f4f2-3901-38a63c47d76c@huawei.com>
Date:   Thu, 21 Sep 2017 19:19:06 +0800
From:   Yunsheng Lin <linyunsheng@...wei.com>
To:     Igor Russkikh <igor.russkikh@...antia.com>,
        "David S . Miller" <davem@...emloft.net>
CC:     <netdev@...r.kernel.org>, David Arcari <darcari@...hat.com>,
        Pavel Belous <Pavel.Belous@...antia.com>,
        Nadezhda Krupnina <Nadezhda.Krupnina@...antia.com>,
        Simon Edelhaus <simon.edelhaus@...antia.com>
Subject: Re: [PATCH net 2/4] net:ethernet:aquantia: Fix Tx queue hangups

Hi, Igor

On 2017/9/21 18:53, Igor Russkikh wrote:
> Driver did a poor job in managing its Tx queues: Sometimes it could stop
> tx queues due to link down condition in aq_nic_xmit - but never waked up
> them. That led to Tx path total suspend.
> This patch fixes this and improves generic queue management:
> - introduces queue restart counter
> - uses generic netif_ interface to disable and enable tx path
> - refactors link up/down condition and introduces dmesg log event when
>   link changes.
> - introduces new constant for minimum descriptors count required for queue
>   wakeup
> 
> Signed-off-by: Pavel Belous <Pavel.Belous@...antia.com>
> Signed-off-by: Igor Russkikh <igor.russkikh@...antia.com>
> ---
>  drivers/net/ethernet/aquantia/atlantic/aq_cfg.h  |  4 ++
>  drivers/net/ethernet/aquantia/atlantic/aq_nic.c  | 91 +++++++++++-------------
>  drivers/net/ethernet/aquantia/atlantic/aq_nic.h  |  2 -
>  drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 26 +++++++
>  drivers/net/ethernet/aquantia/atlantic/aq_ring.h |  4 ++
>  drivers/net/ethernet/aquantia/atlantic/aq_vec.c  |  8 +--
>  6 files changed, 76 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> index 2149864..0fdaaa6 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
> @@ -51,6 +51,10 @@
>  
>  #define AQ_CFG_SKB_FRAGS_MAX   32U
>  
> +/* Number of descriptors available in one ring to resume this ring queue
> + */
> +#define AQ_CFG_RESTART_DESC_THRES   (AQ_CFG_SKB_FRAGS_MAX * 2)
> +
>  #define AQ_CFG_NAPI_WEIGHT     64U
>  
>  #define AQ_CFG_MULTICAST_ADDRESS_MAX     32U
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> index f281392..24f573c 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> @@ -119,6 +119,35 @@ int aq_nic_cfg_start(struct aq_nic_s *self)
>  	return 0;
>  }
>  
> +static int aq_nic_update_link_status(struct aq_nic_s *self)
> +{
> +	int err = self->aq_hw_ops.hw_get_link_status(self->aq_hw);
> +
> +	if (err < 0)
> +		return -1;


why not just return err?

> +
> +	if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps)
> +		pr_info("%s: link change old %d new %d\n",
> +			AQ_CFG_DRV_NAME, self->link_status.mbps,
> +			self->aq_hw->aq_link_status.mbps);

You has ndev in struct aq_nic_s *self, why not use netdev_*?


> +
> +	self->link_status = self->aq_hw->aq_link_status;
> +	if (!netif_carrier_ok(self->ndev) && self->link_status.mbps) {
> +		aq_utils_obj_set(&self->header.flags,
> +				 AQ_NIC_FLAG_STARTED);
> +		aq_utils_obj_clear(&self->header.flags,
> +				   AQ_NIC_LINK_DOWN);
> +		netif_carrier_on(self->ndev);
> +		netif_tx_wake_all_queues(self->ndev);
> +	}
> +	if (netif_carrier_ok(self->ndev) && !self->link_status.mbps) {
> +		netif_carrier_off(self->ndev);
> +		netif_tx_disable(self->ndev);
> +		aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN);
> +	}
> +	return 0;
> +}
> +
>  static void aq_nic_service_timer_cb(unsigned long param)
>  {
>  	struct aq_nic_s *self = (struct aq_nic_s *)param;
> @@ -131,26 +160,13 @@ static void aq_nic_service_timer_cb(unsigned long param)
>  	if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY))
>  		goto err_exit;
>  
> -	err = self->aq_hw_ops.hw_get_link_status(self->aq_hw);
> -	if (err < 0)
> +	err = aq_nic_update_link_status(self);
> +	if (err)
>  		goto err_exit;
>  
> -	self->link_status = self->aq_hw->aq_link_status;
> -
>  	self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw,
>  		    self->aq_nic_cfg.is_interrupt_moderation);
>  
> -	if (self->link_status.mbps) {
> -		aq_utils_obj_set(&self->header.flags,
> -				 AQ_NIC_FLAG_STARTED);
> -		aq_utils_obj_clear(&self->header.flags,
> -				   AQ_NIC_LINK_DOWN);
> -		netif_carrier_on(self->ndev);
> -	} else {
> -		netif_carrier_off(self->ndev);
> -		aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN);
> -	}
> -
>  	memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s));
>  	memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
>  	for (i = AQ_DIMOF(self->aq_vec); i--;) {
> @@ -240,7 +256,6 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
>  int aq_nic_ndev_register(struct aq_nic_s *self)
>  {
>  	int err = 0;
> -	unsigned int i = 0U;
>  
>  	if (!self->ndev) {
>  		err = -EINVAL;
> @@ -262,8 +277,7 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
>  
>  	netif_carrier_off(self->ndev);
>  
> -	for (i = AQ_CFG_VECS_MAX; i--;)
> -		aq_nic_ndev_queue_stop(self, i);
> +	netif_tx_disable(self->ndev);
>  
>  	err = register_netdev(self->ndev);
>  	if (err < 0)
> @@ -319,12 +333,8 @@ struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev)
>  		err = -EINVAL;
>  		goto err_exit;
>  	}
> -	if (netif_running(ndev)) {
> -		unsigned int i;
> -
> -		for (i = AQ_CFG_VECS_MAX; i--;)
> -			netif_stop_subqueue(ndev, i);
> -	}
> +	if (netif_running(ndev))
> +		netif_tx_disable(ndev);
>  
>  	for (self->aq_vecs = 0; self->aq_vecs < self->aq_nic_cfg.vecs;
>  		self->aq_vecs++) {
> @@ -384,16 +394,6 @@ int aq_nic_init(struct aq_nic_s *self)
>  	return err;
>  }
>  
> -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx)
> -{
> -	netif_start_subqueue(self->ndev, idx);
> -}
> -
> -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx)
> -{
> -	netif_stop_subqueue(self->ndev, idx);
> -}
> -
>  int aq_nic_start(struct aq_nic_s *self)
>  {
>  	struct aq_vec_s *aq_vec = NULL;
> @@ -452,10 +452,6 @@ int aq_nic_start(struct aq_nic_s *self)
>  			goto err_exit;
>  	}
>  
> -	for (i = 0U, aq_vec = self->aq_vec[0];
> -		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
> -		aq_nic_ndev_queue_start(self, i);
> -
>  	err = netif_set_real_num_tx_queues(self->ndev, self->aq_vecs);
>  	if (err < 0)
>  		goto err_exit;
> @@ -464,6 +460,8 @@ int aq_nic_start(struct aq_nic_s *self)
>  	if (err < 0)
>  		goto err_exit;
>  
> +	netif_tx_start_all_queues(self->ndev);
> +
>  err_exit:
>  	return err;
>  }
> @@ -603,7 +601,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
>  	unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
>  	unsigned int tc = 0U;
>  	int err = NETDEV_TX_OK;
> -	bool is_nic_in_bad_state;
>  
>  	frags = skb_shinfo(skb)->nr_frags + 1;
>  
> @@ -614,13 +611,10 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
>  		goto err_exit;
>  	}
>  
> -	is_nic_in_bad_state = aq_utils_obj_test(&self->header.flags,
> -						AQ_NIC_FLAGS_IS_NOT_TX_READY) ||
> -						(aq_ring_avail_dx(ring) <
> -						AQ_CFG_SKB_FRAGS_MAX);
> +	aq_ring_update_queue_state(ring);
>  
> -	if (is_nic_in_bad_state) {
> -		aq_nic_ndev_queue_stop(self, ring->idx);
> +	/* Above status update may stop the queue. Check this. */
> +	if (__netif_subqueue_stopped(self->ndev, ring->idx)) {
>  		err = NETDEV_TX_BUSY;
>  		goto err_exit;
>  	}
> @@ -632,9 +626,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
>  						      ring,
>  						      frags);
>  		if (err >= 0) {
> -			if (aq_ring_avail_dx(ring) < AQ_CFG_SKB_FRAGS_MAX + 1)
> -				aq_nic_ndev_queue_stop(self, ring->idx);
> -
>  			++ring->stats.tx.packets;
>  			ring->stats.tx.bytes += skb->len;
>  		}
> @@ -906,9 +897,7 @@ int aq_nic_stop(struct aq_nic_s *self)
>  	struct aq_vec_s *aq_vec = NULL;
>  	unsigned int i = 0U;
>  
> -	for (i = 0U, aq_vec = self->aq_vec[0];
> -		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
> -		aq_nic_ndev_queue_stop(self, i);
> +	netif_tx_disable(self->ndev);
>  
>  	del_timer_sync(&self->service_timer);
>  
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> index 7fc2a5e..0ddd556 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> @@ -83,8 +83,6 @@ struct net_device *aq_nic_get_ndev(struct aq_nic_s *self);
>  int aq_nic_init(struct aq_nic_s *self);
>  int aq_nic_cfg_start(struct aq_nic_s *self);
>  int aq_nic_ndev_register(struct aq_nic_s *self);
> -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx);
> -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx);
>  void aq_nic_ndev_free(struct aq_nic_s *self);
>  int aq_nic_start(struct aq_nic_s *self);
>  int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb);
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> index 4eee199..02f79b0 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
> @@ -104,6 +104,32 @@ int aq_ring_init(struct aq_ring_s *self)
>  	return 0;
>  }
>  
> +void aq_ring_update_queue_state(struct aq_ring_s *ring)
> +{
> +	if (aq_ring_avail_dx(ring) <= AQ_CFG_SKB_FRAGS_MAX)
> +		aq_ring_queue_stop(ring);
> +	else if (aq_ring_avail_dx(ring) > AQ_CFG_RESTART_DESC_THRES)
> +		aq_ring_queue_wake(ring);
> +}
> +
> +void aq_ring_queue_wake(struct aq_ring_s *ring)
> +{
> +	struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
> +
> +	if (__netif_subqueue_stopped(ndev, ring->idx)) {
> +		netif_wake_subqueue(ndev, ring->idx);
> +		ring->stats.tx.queue_restarts++;
> +	}
> +}
> +
> +void aq_ring_queue_stop(struct aq_ring_s *ring)
> +{
> +	struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
> +
> +	if (!__netif_subqueue_stopped(ndev, ring->idx))
> +		netif_stop_subqueue(ndev, ring->idx);
> +}
> +
>  void aq_ring_tx_clean(struct aq_ring_s *self)
>  {
>  	struct device *dev = aq_nic_get_dev(self->aq_nic);
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> index 782176c..24523b5 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
> @@ -94,6 +94,7 @@ struct aq_ring_stats_tx_s {
>  	u64 errors;
>  	u64 packets;
>  	u64 bytes;
> +	u64 queue_restarts;
>  };
>  
>  union aq_ring_stats_s {
> @@ -147,6 +148,9 @@ struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self,
>  int aq_ring_init(struct aq_ring_s *self);
>  void aq_ring_rx_deinit(struct aq_ring_s *self);
>  void aq_ring_free(struct aq_ring_s *self);
> +void aq_ring_update_queue_state(struct aq_ring_s *ring);
> +void aq_ring_queue_wake(struct aq_ring_s *ring);
> +void aq_ring_queue_stop(struct aq_ring_s *ring);
>  void aq_ring_tx_clean(struct aq_ring_s *self);
>  int aq_ring_rx_clean(struct aq_ring_s *self,
>  		     struct napi_struct *napi,
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> index ebf5880..305ff8f 100644
> --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
> @@ -59,12 +59,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
>  			if (ring[AQ_VEC_TX_ID].sw_head !=
>  			    ring[AQ_VEC_TX_ID].hw_head) {
>  				aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]);
> -
> -				if (aq_ring_avail_dx(&ring[AQ_VEC_TX_ID]) >
> -				    AQ_CFG_SKB_FRAGS_MAX) {
> -					aq_nic_ndev_queue_start(self->aq_nic,
> -						ring[AQ_VEC_TX_ID].idx);
> -				}
> +				aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]);
>  				was_tx_cleaned = true;
>  			}
>  
> @@ -364,6 +359,7 @@ void aq_vec_add_stats(struct aq_vec_s *self,
>  		stats_tx->packets += tx->packets;
>  		stats_tx->bytes += tx->bytes;
>  		stats_tx->errors += tx->errors;
> +		stats_tx->queue_restarts += tx->queue_restarts;
>  	}
>  }
>  
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ