lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 2 Nov 2017 13:14:53 -0700
From:   Florian Fainelli <f.fainelli@...il.com>
To:     Miquel Raynal <miquel.raynal@...e-electrons.com>,
        "David S . Miller" <davem@...emloft.net>
Cc:     Thomas Petazzoni <thomas.petazzoni@...e-electrons.com>,
        Antoine Tenart <antoine.tenart@...e-electrons.com>,
        Gregory Clement <gregory.clement@...e-electrons.com>,
        Nadav Haklai <nadavh@...vell.com>, netdev@...r.kernel.org,
        Stefan Chulski <stefanc@...vell.com>
Subject: Re: [PATCH] net: mvpp2: add ethtool GOP statistics

On 11/02/2017 11:52 AM, Miquel Raynal wrote:
> Add ethtool statistics support by reading the GOP statistics from the
> hardware counters. Also implement a workqueue to gather the statistics
> every second or some 32-bit counters could overflow.
> 
> Suggested-by: Stefan Chulski <stefanc@...vell.com>
> Signed-off-by: Miquel Raynal <miquel.raynal@...e-electrons.com>
> ---
>  drivers/net/ethernet/marvell/mvpp2.c | 226 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 220 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
> index 97efe4733661..fb92a0927116 100644
> --- a/drivers/net/ethernet/marvell/mvpp2.c
> +++ b/drivers/net/ethernet/marvell/mvpp2.c
> @@ -769,6 +769,44 @@ enum mvpp2_bm_type {
>  	MVPP2_BM_SWF_SHORT
>  };
>  
> +/* GMAC MIB Counters register definitions */
> +#define MVPP21_MIB_COUNTERS_OFFSET		0x1000
> +#define MVPP21_MIB_COUNTERS_PORT_SZ		0x400
> +#define MVPP22_MIB_COUNTERS_OFFSET		0x0
> +#define MVPP22_MIB_COUNTERS_PORT_SZ		0x100
> +
> +#define MVPP2_MIB_GOOD_OCTETS_RCVD_LOW		0x0
> +#define MVPP2_MIB_GOOD_OCTETS_RCVD_HIGH		0x4
> +#define MVPP2_MIB_BAD_OCTETS_RCVD		0x8
> +#define MVPP2_MIB_CRC_ERRORS_SENT		0xc
> +#define MVPP2_MIB_UNICAST_FRAMES_RCVD		0x10
> +#define MVPP2_MIB_BROADCAST_FRAMES_RCVD		0x18
> +#define MVPP2_MIB_MULTICAST_FRAMES_RCVD		0x1c
> +#define MVPP2_MIB_FRAMES_64_OCTETS		0x20
> +#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS	0x24
> +#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS	0x28
> +#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS	0x2c
> +#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS	0x30
> +#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS	0x34
> +#define MVPP2_MIB_GOOD_OCTETS_SENT_LOW		0x38
> +#define MVPP2_MIB_GOOD_OCTETS_SENT_HIGH		0x3c
> +#define MVPP2_MIB_UNICAST_FRAMES_SENT		0x40
> +#define MVPP2_MIB_MULTICAST_FRAMES_SENT		0x48
> +#define MVPP2_MIB_BROADCAST_FRAMES_SENT		0x4c
> +#define MVPP2_MIB_FC_SENT			0x54
> +#define MVPP2_MIB_FC_RCVD			0x58
> +#define MVPP2_MIB_RX_FIFO_OVERRUN		0x5c
> +#define MVPP2_MIB_UNDERSIZE_RCVD		0x60
> +#define MVPP2_MIB_FRAGMENTS_RCVD		0x64
> +#define MVPP2_MIB_OVERSIZE_RCVD			0x68
> +#define MVPP2_MIB_JABBER_RCVD			0x6c
> +#define MVPP2_MIB_MAC_RCV_ERROR			0x70
> +#define MVPP2_MIB_BAD_CRC_EVENT			0x74
> +#define MVPP2_MIB_COLLISION			0x78
> +#define MVPP2_MIB_LATE_COLLISION		0x7c
> +
> +#define MVPP2_MIB_COUNTERS_STATS_DELAY		(1 * HZ)
> +
>  /* Definitions */
>  
>  /* Shared Packet Processor resources */
> @@ -796,6 +834,7 @@ struct mvpp2 {
>  	struct clk *axi_clk;
>  
>  	/* List of pointers to port structures */
> +	int port_count;
>  	struct mvpp2_port **port_list;
>  
>  	/* Aggregated TXQs */
> @@ -817,6 +856,10 @@ struct mvpp2 {
>  
>  	/* Maximum number of RXQs per port */
>  	unsigned int max_port_rxqs;
> +
> +	/* Workqueue to gather hardware statistics */
> +	struct delayed_work stats_work;
> +	struct workqueue_struct *stats_queue;
>  };
>  
>  struct mvpp2_pcpu_stats {
> @@ -879,6 +922,7 @@ struct mvpp2_port {
>  	u16 tx_ring_size;
>  	u16 rx_ring_size;
>  	struct mvpp2_pcpu_stats __percpu *stats;
> +	u64 *ethtool_stats;
>  
>  	phy_interface_t phy_interface;
>  	struct device_node *phy_node;
> @@ -4743,9 +4787,137 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port)
>  	writel(val, port->base + MVPP2_GMAC_CTRL_1_REG);
>  }
>  
> +static u64 mvpp2_read_count(struct mvpp2_port *port, unsigned int offset)
> +{
> +	bool reg_is_64b =
> +		(offset == MVPP2_MIB_GOOD_OCTETS_RCVD_LOW) ||
> +		(offset == MVPP2_MIB_GOOD_OCTETS_SENT_LOW);

This does not scale very well, put that in your statistics structure and
define a member "reg_is_64b" there such that you can pass a pointer to
one of these members here, and check, on per-counter basis whether this
is needed or not.

> +	void __iomem *base;
> +	u64 val;
> +
> +	if (port->priv->hw_version == MVPP21)
> +		base = port->priv->lms_base + MVPP21_MIB_COUNTERS_OFFSET +
> +		       port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
> +	else
> +		base = port->priv->iface_base + MVPP22_MIB_COUNTERS_OFFSET +
> +		       port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ;
> +
> +	val = readl(base + offset);
> +	if (reg_is_64b)
> +		val += (u64)readl(base + offset + 4) << 32;

So the value gets latched when the higher part gets read last?

> +
> +	return val;
> +}
> +
> +struct mvpp2_ethtool_statistics {
> +	unsigned int offset;
> +	const char string[ETH_GSTRING_LEN];

Add your reg_is_64b member here too.

> +};
> +
> +/* Due to the fact that software statistics and hardware statistics are, by
> + * design, incremented at different moments in the chain of packet processing,
> + * it is very likely that incoming packets could have been dropped after being
> + * counted by hardware but before reaching software statistics (most probably
> + * multicast packets), and in the oppposite way, during transmission, FCS bytes
> + * are added in between as well as TSO skb will be split and header bytes added.
> + */

OK, not sure what to make of that comment.

> +static struct mvpp2_ethtool_statistics mvpp2_ethtool_stats[] = {
> +	{ MVPP2_MIB_GOOD_OCTETS_RCVD_LOW, "good_octets_received" },
> +	{ MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" },
> +	{ MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" },
> +	{ MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" },
> +	{ MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" },
> +	{ MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" },
> +	{ MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" },
> +	{ MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" },
> +	{ MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" },
> +	{ MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" },
> +	{ MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" },
> +	{ MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" },
> +	{ MVPP2_MIB_GOOD_OCTETS_SENT_LOW, "good_octets_sent" },
> +	{ MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" },
> +	{ MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" },
> +	{ MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" },
> +	{ MVPP2_MIB_FC_SENT, "fc_sent" },
> +	{ MVPP2_MIB_FC_RCVD, "fc_received" },
> +	{ MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" },
> +	{ MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" },
> +	{ MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" },
> +	{ MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" },
> +	{ MVPP2_MIB_JABBER_RCVD, "jabber_received" },
> +	{ MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" },
> +	{ MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" },
> +	{ MVPP2_MIB_COLLISION, "collision" },
> +	{ MVPP2_MIB_LATE_COLLISION, "late_collision" },
> +};
> +
> +static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
> +				      u8 *data)
> +{
> +	if (sset == ETH_SS_STATS) {
> +		int i;
> +
> +		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++)
> +			memcpy(data + i * ETH_GSTRING_LEN,
> +			       &mvpp2_ethtool_stats[i].string, ETH_GSTRING_LEN);
> +	}
> +}
> +
> +static void mvpp2_gather_hw_statistics(struct work_struct *work)
> +{
> +	struct delayed_work *del_work = to_delayed_work(work);
> +	struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work);
> +	struct mvpp2_port *port;
> +	u64 *pstats;
> +	int i, j;
> +
> +	for (i = 0; i < priv->port_count; i++) {
> +		if (!priv->port_list[i])
> +			continue;
> +
> +		port = priv->port_list[i];
> +		pstats = port->ethtool_stats;

port->ethtool_stats was allocated this way:

port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats)) instead of
ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64)

This is probably working right now because mvpp2_ethtool_stats is much
bigger than ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64).


> +		for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_stats); j++)
> +			*pstats++ += mvpp2_read_count(
> +				port, mvpp2_ethtool_stats[j].offset);

You might want to look into the helper functions from
include/linux/u64_stats_sync.h to safely add a 32-bit quantity to a
64-bit quantity on 32-bit hosts.

> +	}
> +
> +	/* No need to read again the counters right after this function if it
> +	 * was called asynchronously by the user (ie. use of ethtool).
> +	 */
> +	cancel_delayed_work(&priv->stats_work);
> +	queue_delayed_work(priv->stats_queue, &priv->stats_work,
> +			   MVPP2_MIB_COUNTERS_STATS_DELAY);
> +}
> +
> +static void mvpp2_ethtool_get_stats(struct net_device *dev,
> +				    struct ethtool_stats *stats, u64 *data)
> +{
> +	struct mvpp2_port *port = netdev_priv(dev);
> +
> +	/* Update statistics for all ports, copy only those actually needed */
> +	mvpp2_gather_hw_statistics(&port->priv->stats_work.work);
> +
> +	memcpy(data, port->ethtool_stats,
> +	       sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_stats));
> +}
> +
> +static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
> +{
> +	if (sset == ETH_SS_STATS)
> +		return ARRAY_SIZE(mvpp2_ethtool_stats);
> +
> +	return -EOPNOTSUPP;
> +}
> +
>  static void mvpp2_port_reset(struct mvpp2_port *port)
>  {
>  	u32 val;
> +	int i;

unsigned int i

> +
> +	/* Read the GOP statistics to reset the hardware counters */
> +	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++)
> +		mvpp2_read_count(port, mvpp2_ethtool_stats[i].offset);
>  
>  	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
>  		    ~MVPP2_GMAC_PORT_RESET_MASK;
> @@ -7199,6 +7371,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
>  	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
>  	.get_ringparam	= mvpp2_ethtool_get_ringparam,
>  	.set_ringparam	= mvpp2_ethtool_set_ringparam,
> +	.get_strings	= mvpp2_ethtool_get_strings,
> +	.get_ethtool_stats = mvpp2_ethtool_get_stats,
> +	.get_sset_count	= mvpp2_ethtool_get_sset_count,
>  	.get_link_ksettings = phy_ethtool_get_link_ksettings,
>  	.set_link_ksettings = phy_ethtool_set_link_ksettings,
>  };
> @@ -7613,13 +7788,19 @@ static int mvpp2_port_probe(struct platform_device *pdev,
>  		port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
>  	}
>  
> -	/* Alloc per-cpu stats */
> +	/* Alloc per-cpu and ethtool stats */
>  	port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
>  	if (!port->stats) {
>  		err = -ENOMEM;
>  		goto err_free_irq;
>  	}
>  
> +	port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats), GFP_KERNEL);
> +	if (!port->ethtool_stats) {
> +		err = -ENOMEM;
> +		goto err_free_stats;
> +	}

Should not the above be kcalloc(sizeof(u64),
ARRAY_SIZE(mvpp2_ethtool_stats), GFP_KERNEL)? That is, an array of
ARRAY_SIZE(mvpp2_ethtool_stats) elements, each sizeof(u64) bytes wide?

> +
>  	mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
>  
>  	port->tx_ring_size = MVPP2_MAX_TXD;
> @@ -7629,7 +7810,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
>  	err = mvpp2_port_init(port);
>  	if (err < 0) {
>  		dev_err(&pdev->dev, "failed to init port %d\n", id);
> -		goto err_free_stats;
> +		goto err_free_ethstats;
>  	}
>  
>  	mvpp2_port_periodic_xon_disable(port);
> @@ -7685,6 +7866,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
>  err_free_txq_pcpu:
>  	for (i = 0; i < port->ntxqs; i++)
>  		free_percpu(port->txqs[i]->pcpu);
> +err_free_ethstats:
> +	kfree(port->ethtool_stats);
>  err_free_stats:
>  	free_percpu(port->stats);
>  err_free_irq:
> @@ -7707,6 +7890,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
>  	of_node_put(port->phy_node);
>  	free_percpu(port->pcpu);
>  	free_percpu(port->stats);
> +	kfree(port->ethtool_stats);
>  	for (i = 0; i < port->ntxqs; i++)
>  		free_percpu(port->txqs[i]->pcpu);
>  	mvpp2_queue_vectors_deinit(port);
> @@ -7893,7 +8077,7 @@ static int mvpp2_probe(struct platform_device *pdev)
>  	struct mvpp2 *priv;
>  	struct resource *res;
>  	void __iomem *base;
> -	int port_count, i;
> +	int i;
>  	int err;
>  
>  	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> @@ -8008,14 +8192,14 @@ static int mvpp2_probe(struct platform_device *pdev)
>  		goto err_mg_clk;
>  	}
>  
> -	port_count = of_get_available_child_count(dn);
> -	if (port_count == 0) {
> +	priv->port_count = of_get_available_child_count(dn);
> +	if (priv->port_count == 0) {
>  		dev_err(&pdev->dev, "no ports enabled\n");
>  		err = -ENODEV;
>  		goto err_mg_clk;
>  	}
>  
> -	priv->port_list = devm_kcalloc(&pdev->dev, port_count,
> +	priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count,
>  				       sizeof(*priv->port_list),
>  				       GFP_KERNEL);>  	if (!priv->port_list) {
> @@ -8023,6 +8207,20 @@ static int mvpp2_probe(struct platform_device *pdev)
>  		goto err_mg_clk;
>  	}
>  
> +	/* Statistics must be gathered regularly because some of them (like
> +	 * packets counters) are 32-bit registers and could overflow quite
> +	 * quickly. For instance, a 10Gb link used at full bandwidth with the
> +	 * smallest packets (64B) will overflow a 32-bit counter in less than
> +	 * 30 seconds. Then, use a workqueue to fill 64-bit counters.
> +	 */
> +	priv->stats_queue = create_singlethread_workqueue("mvpp2_hw_stats");
> +	if (!priv->stats_queue) {
> +		err = -ENOMEM;
> +		goto err_mg_clk;
> +	}

If I have multiple of these network devices in my system, it would be
nice to have an unique identifier after mvpp22_hw_stats to help
differentiate them (and possibly change their scheduling priorities),
how about using "mvpp2_hw_stats/%d"?

> +
> +	INIT_DELAYED_WORK(&priv->stats_work, mvpp2_gather_hw_statistics);
> +
>  	/* Initialize ports */
>  	i = 0;
>  	for_each_available_child_of_node(dn, port_node) {
> @@ -8033,6 +8231,10 @@ static int mvpp2_probe(struct platform_device *pdev)
>  	}
>  
>  	platform_set_drvdata(pdev, priv);
> +
> +	queue_delayed_work(priv->stats_queue, &priv->stats_work,
> +			   MVPP2_MIB_COUNTERS_STATS_DELAY);

If the network interface is not used (ndo_open is not called) we have
this workqueue running for nothing because the statistics should not
even increase, and this is just creating unnecessary system activity for
nothing.

> +
>  	return 0;
>  
>  err_mg_clk:
> @@ -8053,6 +8255,18 @@ static int mvpp2_remove(struct platform_device *pdev)
>  	struct device_node *port_node;
>  	int i = 0;
>  
> +	/* This work recall himself within a delay. If the cancellation returned
> +	 * a non-zero value, it means a work is still running. In that case, use
> +	 * use the flush (returns when the running work will be done) and cancel
> +	 * the new work that was just submitted to the queue but not started yet
> +	 * due to the delay.
> +	 */
> +	if (!cancel_delayed_work(&priv->stats_work)) {
> +		flush_workqueue(priv->stats_queue);
> +		cancel_delayed_work(&priv->stats_work);
> +	}

Similarly, this needs to be moved to the ndo_stop() function.

> +	destroy_workqueue(priv->stats_queue);
> +
>  	for_each_available_child_of_node(dn, port_node) {
>  		if (priv->port_list[i])
>  			mvpp2_port_remove(priv->port_list[i]);
> 


-- 
Florian

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ