lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <58becbf1-baea-a679-ed32-b58fa5fd24bd@mellanox.com>
Date:   Tue, 27 Mar 2018 00:22:58 +0300
From:   Tal Gilboa <talgi@...lanox.com>
To:     Florian Fainelli <f.fainelli@...il.com>, netdev@...r.kernel.org
Cc:     davem@...emloft.net, jaedon.shin@...il.com, pgynther@...gle.com,
        opendmb@...il.com, michal.chan@...adcom.com, gospo@...adcom.com,
        saeedm@...lanox.com
Subject: Re: [PATCH net-next 1/2] net: systemport: Implement adaptive
 interrupt coalescing

On 3/23/2018 4:19 AM, Florian Fainelli wrote:
> Implement support for adaptive RX and TX interrupt coalescing using
> net_dim. We have each of our TX ring and our single RX ring implement a
> bcm_sysport_net_dim structure which holds an interrupt counter, number
> of packets, bytes, and a container for a net_dim instance.
> 
> Signed-off-by: Florian Fainelli <f.fainelli@...il.com>
> ---
>   drivers/net/ethernet/broadcom/bcmsysport.c | 141 ++++++++++++++++++++++++++---
>   drivers/net/ethernet/broadcom/bcmsysport.h |  14 +++
>   2 files changed, 140 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
> index f15a8fc6dfc9..5a5a726bafa4 100644
> --- a/drivers/net/ethernet/broadcom/bcmsysport.c
> +++ b/drivers/net/ethernet/broadcom/bcmsysport.c
> @@ -15,6 +15,7 @@
>   #include <linux/module.h>
>   #include <linux/kernel.h>
>   #include <linux/netdevice.h>
> +#include <linux/net_dim.h>

I don't think you need this include. You already include net_dim in 
bcmsysport.h and include the bcmsysport.h here.

>   #include <linux/etherdevice.h>
>   #include <linux/platform_device.h>
>   #include <linux/of.h>
> @@ -574,21 +575,55 @@ static int bcm_sysport_set_wol(struct net_device *dev,
>   	return 0;
>   }
>   
> +static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
> +{
> +	u32 reg;
> +
> +	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
> +	reg &= ~(RDMA_INTR_THRESH_MASK |
> +		 RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
> +	reg |= priv->dim.coal_pkts;
> +	reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) <<
> +			    RDMA_TIMEOUT_SHIFT;
> +	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
> +}
> +
> +static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring)
> +{
> +	struct bcm_sysport_priv *priv = ring->priv;
> +	u32 reg;
> +
> +	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index));
> +	reg &= ~(RING_INTR_THRESH_MASK |
> +		 RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
> +	reg |= ring->dim.coal_pkts;
> +	reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) <<
> +			    RING_TIMEOUT_SHIFT;
> +	tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index));
> +}
> +

I wouldn't couple these functions with dim. This implies dim is always 
used. IMO, would be more clear to use a generic method which takes usecs 
and packets as an argument.

>   static int bcm_sysport_get_coalesce(struct net_device *dev,
>   				    struct ethtool_coalesce *ec)
>   {
>   	struct bcm_sysport_priv *priv = netdev_priv(dev);
> +	struct bcm_sysport_tx_ring *ring;
> +	unsigned int i;
>   	u32 reg;
>   
>   	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0));
>   
>   	ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / 1000;
>   	ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK;
> +	for (i = 0; i < dev->num_tx_queues; i++) {
> +		ring = &priv->tx_rings[i];
> +		ec->use_adaptive_tx_coalesce |= ring->dim.use_dim;
> +	}
>   
>   	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
>   
>   	ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 / 1000;
>   	ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK;
> +	ec->use_adaptive_rx_coalesce = priv->dim.use_dim;
>   
>   	return 0;
>   }
> @@ -597,8 +632,8 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
>   				    struct ethtool_coalesce *ec)
>   {
>   	struct bcm_sysport_priv *priv = netdev_priv(dev);
> +	struct bcm_sysport_tx_ring *ring;
>   	unsigned int i;
> -	u32 reg;
>   
>   	/* Base system clock is 125Mhz, DMA timeout is this reference clock
>   	 * divided by 1024, which yield roughly 8.192 us, our maximum value has
> @@ -615,22 +650,26 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
>   		return -EINVAL;
>   
>   	for (i = 0; i < dev->num_tx_queues; i++) {
> -		reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i));
> -		reg &= ~(RING_INTR_THRESH_MASK |
> -			 RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
> -		reg |= ec->tx_max_coalesced_frames;
> -		reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) <<
> -			 RING_TIMEOUT_SHIFT;
> -		tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i));
> +		ring = &priv->tx_rings[i];
> +		ring->dim.coal_pkts = ec->tx_max_coalesced_frames;
> +		ring->dim.coal_usecs = ec->tx_coalesce_usecs;
> +		if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) {
> +			ring->dim.coal_pkts = 1;
> +			ring->dim.coal_usecs = 0;
> +		}
> +		ring->dim.use_dim = ec->use_adaptive_tx_coalesce;
> +		bcm_sysport_set_tx_coalesce(ring);
>   	}

If I understand correctly, if I disable dim, moderation is set to 
{usecs,packets}={0,1} regardless of the input from ethtool right? 
Doesn't this break the wanted behavior? As mentioned above, I would 
decouple dim from the set_tx/rx_coalesce() function. Also, when dim is 
enabled, why change dim.coal_pkts/usecs? They would just be overwritten 
in the next iteration of net_dim.

>   
> -	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
> -	reg &= ~(RDMA_INTR_THRESH_MASK |
> -		 RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
> -	reg |= ec->rx_max_coalesced_frames;
> -	reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) <<
> -			    RDMA_TIMEOUT_SHIFT;
> -	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
> +	priv->dim.coal_usecs = ec->rx_coalesce_usecs;
> +	priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
> +
> +	if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) {
> +		priv->dim.coal_pkts = 1;
> +		priv->dim.coal_usecs = 0;
> +	}
> +	priv->dim.use_dim = ec->use_adaptive_rx_coalesce;
> +	bcm_sysport_set_rx_coalesce(priv);

Same comment as above.

>   
>   	return 0;
>   }
> @@ -709,6 +748,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
>   	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
>   	struct net_device *ndev = priv->netdev;
>   	unsigned int processed = 0, to_process;
> +	unsigned int processed_bytes = 0;
>   	struct bcm_sysport_cb *cb;
>   	struct sk_buff *skb;
>   	unsigned int p_index;
> @@ -800,6 +840,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
>   		 */
>   		skb_pull(skb, sizeof(*rsb) + 2);
>   		len -= (sizeof(*rsb) + 2);
> +		processed_bytes += len;
>   
>   		/* UniMAC may forward CRC */
>   		if (priv->crc_fwd) {
> @@ -824,6 +865,9 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
>   			priv->rx_read_ptr = 0;
>   	}
>   
> +	priv->dim.packets = processed;
> +	priv->dim.bytes = processed_bytes;
> +
>   	return processed;
>   }
>   
> @@ -900,6 +944,8 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
>   	ring->packets += pkts_compl;
>   	ring->bytes += bytes_compl;
>   	u64_stats_update_end(&priv->syncp);
> +	ring->dim.packets = pkts_compl;
> +	ring->dim.bytes = bytes_compl;
>   
>   	ring->c_index = c_index;
>   
> @@ -945,6 +991,7 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
>   {
>   	struct bcm_sysport_tx_ring *ring =
>   		container_of(napi, struct bcm_sysport_tx_ring, napi);
> +	struct net_dim_sample dim_sample;
>   	unsigned int work_done = 0;
>   
>   	work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
> @@ -961,6 +1008,12 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
>   		return 0;
>   	}
>   
> +	if (ring->dim.use_dim) {
> +		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
> +			       ring->dim.bytes, &dim_sample);
> +		net_dim(&ring->dim.dim, dim_sample);
> +	}
> +
>   	return budget;
>   }
>   
> @@ -976,6 +1029,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
>   {
>   	struct bcm_sysport_priv *priv =
>   		container_of(napi, struct bcm_sysport_priv, napi);
> +	struct net_dim_sample dim_sample;
>   	unsigned int work_done = 0;
>   
>   	work_done = bcm_sysport_desc_rx(priv, budget);
> @@ -998,6 +1052,12 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
>   		intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE);
>   	}
>   
> +	if (priv->dim.use_dim) {
> +		net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
> +			       priv->dim.bytes, &dim_sample);
> +		net_dim(&priv->dim.dim, dim_sample);
> +	}
> +
>   	return work_done;
>   }
>   
> @@ -1016,6 +1076,40 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
>   	netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
>   }
>   
> +static void bcm_sysport_dim_work(struct work_struct *work)
> +{
> +	struct net_dim *dim = container_of(work, struct net_dim, work);
> +	struct bcm_sysport_net_dim *ndim =
> +			container_of(dim, struct bcm_sysport_net_dim, dim);
> +	struct bcm_sysport_priv *priv =
> +			container_of(ndim, struct bcm_sysport_priv, dim);
> +	struct net_dim_cq_moder cur_profile =
> +				net_dim_get_profile(dim->mode, dim->profile_ix);
> +
> +	priv->dim.coal_usecs = cur_profile.usec;
> +	priv->dim.coal_pkts = cur_profile.pkts;
> +
> +	bcm_sysport_set_rx_coalesce(priv);
> +	dim->state = NET_DIM_START_MEASURE;
> +}
> +
> +static void bcm_sysport_dim_tx_work(struct work_struct *work)
> +{
> +	struct net_dim *dim = container_of(work, struct net_dim, work);
> +	struct bcm_sysport_net_dim *ndim =
> +			container_of(dim, struct bcm_sysport_net_dim, dim);
> +	struct bcm_sysport_tx_ring *ring =
> +			container_of(ndim, struct bcm_sysport_tx_ring, dim);
> +	struct net_dim_cq_moder cur_profile =
> +				net_dim_get_profile(dim->mode, dim->profile_ix);
> +
> +	ring->dim.coal_usecs = cur_profile.usec;
> +	ring->dim.coal_pkts = cur_profile.pkts;
> +
> +	bcm_sysport_set_tx_coalesce(ring);
> +	dim->state = NET_DIM_START_MEASURE;
> +}
> +
>   /* RX and misc interrupt routine */
>   static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
>   {
> @@ -1034,6 +1128,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
>   	}
>   
>   	if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) {
> +		priv->dim.event_ctr++;
>   		if (likely(napi_schedule_prep(&priv->napi))) {
>   			/* disable RX interrupts */
>   			intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE);
> @@ -1061,6 +1156,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
>   			continue;
>   
>   		txr = &priv->tx_rings[ring];
> +		txr->dim.event_ctr++;
>   
>   		if (likely(napi_schedule_prep(&txr->napi))) {
>   			intrl2_0_mask_set(priv, ring_bit);
> @@ -1093,6 +1189,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id)
>   			continue;
>   
>   		txr = &priv->tx_rings[ring];
> +		txr->dim.event_ctr++;
>   
>   		if (likely(napi_schedule_prep(&txr->napi))) {
>   			intrl2_1_mask_set(priv, BIT(ring));
> @@ -1358,6 +1455,16 @@ static void bcm_sysport_adj_link(struct net_device *dev)
>   		phy_print_status(phydev);
>   }
>   
> +static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
> +				 void (*cb)(struct work_struct *work))
> +{
> +	INIT_WORK(&dim->dim.work, cb);
> +	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
> +	dim->event_ctr = 0;
> +	dim->packets = 0;
> +	dim->bytes = 0;
> +}

What about default values for coal_usecs/pkts? dim supports it through 
net_dim_get_def_profile(mode) function.

> +
>   static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
>   				    unsigned int index)
>   {
> @@ -1447,6 +1554,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
>   	reg |= (1 << index);
>   	tdma_writel(priv, reg, TDMA_TIER1_ARB_0_QUEUE_EN);
>   
> +	bcm_sysport_init_dim(&ring->dim, bcm_sysport_dim_tx_work);
>   	napi_enable(&ring->napi);
>   
>   	netif_dbg(priv, hw, priv->netdev,
> @@ -1477,6 +1585,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv,
>   		return;
>   
>   	napi_disable(&ring->napi);
> +	cancel_work_sync(&ring->dim.dim.work);
>   	netif_napi_del(&ring->napi);
>   
>   	bcm_sysport_tx_clean(priv, ring);
> @@ -1766,6 +1875,7 @@ static void bcm_sysport_netif_start(struct net_device *dev)
>   	struct bcm_sysport_priv *priv = netdev_priv(dev);
>   
>   	/* Enable NAPI */
> +	bcm_sysport_init_dim(&priv->dim, bcm_sysport_dim_work);
>   	napi_enable(&priv->napi);
>   
>   	/* Enable RX interrupt and TX ring full interrupt */
> @@ -1951,6 +2061,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev)
>   	/* stop all software from updating hardware */
>   	netif_tx_stop_all_queues(dev);
>   	napi_disable(&priv->napi);
> +	cancel_work_sync(&priv->dim.dim.work);
>   	phy_stop(dev->phydev);
>   
>   	/* mask all interrupts */
> diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
> index f5a984c1c986..9f48ad3cc38d 100644
> --- a/drivers/net/ethernet/broadcom/bcmsysport.h
> +++ b/drivers/net/ethernet/broadcom/bcmsysport.h
> @@ -12,6 +12,7 @@
>   #define __BCM_SYSPORT_H
>   
>   #include <linux/if_vlan.h>
> +#include <linux/net_dim.h>
>   
>   /* Receive/transmit descriptor format */
>   #define DESC_ADDR_HI_STATUS_LEN	0x00
> @@ -695,6 +696,16 @@ struct bcm_sysport_hw_params {
>   	unsigned int	num_rx_desc_words;
>   };
>   
> +struct bcm_sysport_net_dim {
> +	u16			use_dim;
> +	u16			event_ctr;
> +	unsigned long		packets;
> +	unsigned long		bytes;
> +	u32			coal_usecs;
> +	u32			coal_pkts;
> +	struct net_dim		dim;
> +};
> +
>   /* Software view of the TX ring */
>   struct bcm_sysport_tx_ring {
>   	spinlock_t	lock;		/* Ring lock for tx reclaim/xmit */
> @@ -712,6 +723,7 @@ struct bcm_sysport_tx_ring {
>   	struct bcm_sysport_priv *priv;	/* private context backpointer */
>   	unsigned long	packets;	/* packets statistics */
>   	unsigned long	bytes;		/* bytes statistics */
> +	struct bcm_sysport_net_dim dim;	/* Net DIM context */
>   	unsigned int	switch_queue;	/* switch port queue number */
>   	unsigned int	switch_port;	/* switch port queue number */
>   	bool		inspect;	/* inspect switch port and queue */
> @@ -743,6 +755,8 @@ struct bcm_sysport_priv {
>   	unsigned int		rx_read_ptr;
>   	unsigned int		rx_c_index;
>   
> +	struct bcm_sysport_net_dim	dim;
> +
>   	/* PHY device */
>   	struct device_node	*phy_dn;
>   	phy_interface_t		phy_interface;
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ